-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstaticSketchList.json
More file actions
1 lines (1 loc) · 476 KB
/
staticSketchList.json
File metadata and controls
1 lines (1 loc) · 476 KB
1
[{"params": [{"field": "capacity", "help": "", "type": "int"}], "name": "DeterministicSpaceSaving", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef DeterminsticSpaceSaving_H //must change this MACRO\n#define DeterminsticSpaceSaving_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n#define key_len 4\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <cstdint>\n#include <cstdlib>\n#include <cstring>\n#include <unordered_map>\n#include <vector>\n#include <sstream>\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\ndefine: e.g. BOBHash myhash\nsetseed: e.g. myhash.SetSeed(1001)\ncalculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\ntemplate<int keylen> struct KeyNode;\ntemplate<int keylen>\nstruct ValueNode\n{\n\tValueNode<keylen> * prev = NULL;\n\tValueNode<keylen> * next = NULL;\n\tKeyNode<keylen> * first = NULL;\n\tuint32_t val = 0;\n};\n\ntemplate<int keylen>\nstruct KeyNode\n{\n\tValueNode<keylen> * parent = NULL;\n\tKeyNode<keylen> * prev = NULL;\n\tKeyNode<keylen> * next = NULL;\n\tchar key[keylen];\n};\nclass DeterministicSpaceSaving : public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tuint32_t mem_in_bytes;\n\tint capacity;//parameter\n\n\n\tint bytes_per_item = 36 + key_len * 2;\n\ttypedef KeyNode<key_len> SSKeyNode;\n\ttypedef ValueNode<key_len> SSValNode;\n\tSSKeyNode *key_nodes;\n\tSSValNode *val_nodes;\n\n\n\t// SSKeyNode * key_node_pool[capacity];\n\tSSValNode ** val_node_pool;\n\t// int key_node_empty_cnt;\n\tint val_node_empty_cnt;\n\n\tSSValNode * tail_node;\n\n\tunordered_map<string, SSKeyNode *> hash_table;\n\t/*----optional according to your need----*/\n\tvoid append_new_key(const char * key) {\n\t\t// exact first key in tail node\n\t\tSSKeyNode * victim = tail_node->first;\n\t\tconst char * old_key = victim->key;\n\t\thash_table.erase(string(old_key, key_len));\n\t\thash_table[string(key, key_len)] = victim;\n\n\t\tmemcpy(victim->key, key, key_len);\n\t\tadd_counter(victim);\n\t}\n\n\tvoid add_counter(SSKeyNode * my)\n\t{\n\t\t\nif (my->next == my)\n\t\t{\n \n my->parent->val++;\n\t\t\tSSValNode * brother = my->parent->prev;\n if (brother && brother->val == my->parent->val)\n\t\t\t{\n\t\t\t\tval_node_pool[val_node_empty_cnt++] = my->parent; // release my parent\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t // cout << \"release\" << endl;\n\t\t\t\tSSValNode * old_parent = my->parent;\n\t\t\t\tif (old_parent->next)\n\t\t\t\t\told_parent->next->prev = old_parent->prev;\n\t\t\t\tif (old_parent == tail_node)\n\t\t\t\t\ttail_node = old_parent->prev;\n\t\t\t\told_parent->prev->next = old_parent->next;\n\t\t\t\tmy->parent = brother;\n\t\t\t\tmy->next = brother->first;\n\t\t\t\tmy->prev = brother->first->prev;\n\t\t\t\tmy->prev->next = my;\n\t\t\t\tmy->next->prev = my;\n\t\t\t\tbrother->first = my;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n my->next->prev = my->prev;\n\t\t\tmy->prev->next = my->next;\n\t\t\tif (my->parent->first == my) {\n\t\t\t\tmy->parent->first = my->next;\n\t\t\t}\n\t\t\tSSValNode * brother = my->parent->prev;\n\t\t\tif (brother && brother->val == my->parent->val + 1) {\n\n\t\t\t\tmy->parent = brother;\n\t\t\t\tmy->next = brother->first;\n\t\t\t\tmy->prev = brother->first->prev;\n\t\t\t\tmy->prev->next = my;\n\t\t\t\tmy->next->prev = my;\n\t\t\t\tbrother->first = my;\n\t\t\t}\n\t\t\telse {\n\t\t\t\tSSValNode * new_parent = val_node_pool[--val_node_empty_cnt];\n\t\t\t\tSSValNode * old_parent = my->parent;\n\t\t\t\tnew_parent->next = old_parent;\n\t\t\t\tnew_parent->prev = brother;\n\t\t\t\tnew_parent->val = old_parent->val + 1;\n\t\t\t\tnew_parent->first = my;\n\t\t\t\tif (brother)\n\t\t\t\t\tbrother->next = new_parent;\n\t\t\t\told_parent->prev = new_parent;\n\t\t\t\tmy->parent = new_parent;\n\t\t\t\tmy->next = my;\n\t\t\t\tmy->prev = my;\n\t\t\t}\n\t\t}\n\t}\npublic:\n\tusing SketchBase::sketch_name;//DO NOT change this declaration\n\tDeterministicSpaceSaving()\n\t{\n\t\t/*constructed function MUST BT non-parameter!!!*/\n\t\tsketch_name = \"DeterministicSpaceSaving \";//please keep sketch_name the same as class name and .h file name\n\t}\n\tvoid parameterSet(const std::string& parameterName, double parameterValue)\n\t{\n\t\t/*MUST have this function even empty function body*/\n\n\t\t/*----optional according to your need----*/\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid init()\n\t{\n\t\t/*MUST have this function even empty function body*/\n\n\t\t/*----optional according to your need----*/\n\t\t//capacity = mem_in_bytes / bytes_per_item;\n\t\tkey_nodes = new SSKeyNode[capacity];\n\t\tval_nodes = new SSValNode[capacity];\n\t\tval_node_pool = new SSValNode*[capacity];\n\t\tmemset(key_nodes, 0, sizeof(SSKeyNode)*capacity);\n memset(val_nodes, 0, sizeof(SSValNode)*capacity);\n\t\t\tval_node_empty_cnt = 0;\n\t\tfor (int i = 1; i < capacity; ++i) {\n\t\t\tval_node_pool[val_node_empty_cnt++] = val_nodes + i;\n\t\t}\n\n\t\ttail_node = val_nodes;\n\t\tval_nodes[0].val = 0;\n\t\tval_nodes->first = key_nodes;\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tkey_nodes[i].next = key_nodes + (i + 1 + capacity) % capacity;\n\t\t\tkey_nodes[i].prev = key_nodes + (i - 1 + capacity) % capacity;\n\t\t\tkey_nodes[i].parent = tail_node;\n\t\t}\n\n\t\thash_table.reserve(10 * capacity);\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid Insert(const char *str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change parameter type*/\n\n\t\t/*----optional according to your need----*/\n\t\tauto itr = hash_table.find(string(str, len));\n\t\tif (itr == hash_table.end())\n\t\t{\n\t\t\t// key not found\n \n\t\t\tappend_new_key(str);\n\t\t}\n\t\telse\n\t\t{\n\t\t\t// key found\n\t\t\tadd_counter(itr->second);\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tint frequencyQuery(const char *str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\treturn 0;\n\t\t/*----optional according to your need----*/\n\t}\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\n\t\tSSValNode * p = tail_node;\n\n\t\twhile (p->prev)\n\t\t\tp = p->prev;\n\n\t\tint i = 0;\n\t\tfor (; p; p = p->next) {\n\t\t\tSSKeyNode * v = p->first;\n\t\t\tdo {\n\t\t\t\ttopkItem.push_back(make_pair(string(v->key, key_len), p->val));\n\t\t\t\t//result[i++].second = p->val;\n\t\t\t\tv = v->next;\n\t\t\t\tif (i == k) {\n\t\t\t\t\treturn topkItem;\n\t\t\t\t}\n\t\t\t} while (v != p->first);\n\t\t}\n\n\t\treturn topkItem;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid reset()\n\t{\n\t\t/*MUST have this function,reset sketch to the initial state */\n\n\t\t/*----optional according to your need----*/\n\t\t//capacity = mem_in_bytes / bytes_per_item;\n\t\tkey_nodes = new SSKeyNode[capacity];\n\t\tval_nodes = new SSValNode[capacity];\n\t\tval_node_pool = new SSValNode*[capacity];\n\t\tmemset(key_nodes, 0, sizeof(SSKeyNode)*capacity);\n memset(val_nodes, 0, sizeof(SSValNode)*capacity);\n\t\t\tval_node_empty_cnt = 0;\n\t\tfor (int i = 1; i < capacity; ++i) {\n\t\t\tval_node_pool[val_node_empty_cnt++] = val_nodes + i;\n\t\t}\n\n\t\ttail_node = val_nodes;\n\t\tval_nodes[0].val = 0;\n\t\tval_nodes->first = key_nodes;\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tkey_nodes[i].next = key_nodes + (i + 1 + capacity) % capacity;\n\t\t\tkey_nodes[i].prev = key_nodes + (i - 1 + capacity) % capacity;\n\t\t\tkey_nodes[i].parent = tail_node;\n\t\t}\n\t\thash_table.clear();\n\t\thash_table.reserve(10 * capacity);\n\t\t/*----optional according to your need----*/\n\t}\n\t~DeterministicSpaceSaving()\n\t{\n\t\t/*MUST have this function */\n\n\t\t/*----optional according to your need----*/\n\t\tdelete[] key_nodes;\n\t\tdelete[] val_nodes;\n\t\tdelete[] val_node_pool;\n\t\t/*----optional according to your need----*/\n\t}\n\n\t/*----optional You can add your function----*/\n};\nREGISTER(DeterministicSpaceSaving);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/DeterministicSpaceSaving.h"}, {"params": [{"field": "capacity", "help": "", "type": "int"}], "name": "UnbiasedSpaceSaving", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef UnbiasedSpaceSaving_H //must change this MACRO\n#define UnibiasedSpaceSaving_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n#define key_len 4\n/*----optional according to your need----*/\n#include<string>\n#include<random>\n#include<iostream>\n#include<memory.h>\n#include <cstdint>\n#include <cstdlib>\n#include <cstring>\n#include <unordered_map>\n#include <vector>\n#include <sstream>\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\ndefine: e.g. BOBHash myhash\nsetseed: e.g. myhash.SetSeed(1001)\ncalculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase();\nvirtual void parameterSet(const string& parameterName, double parameterValue)=0;\nvirtual init() = 0;\nvirtual void Insert(const uint8_t *str, const int & len) = 0;\nvirtual int frequencyQuery(const uint8_t *str, const int & len) = 0;\nvirtual vector<string> topkQuery(const int & k) = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\ntemplate<int keylen> struct ussKeyNode;\ntemplate<int keylen>\nstruct ussValueNode\n{\n\tussValueNode<keylen> * prev = NULL;\n\tussValueNode<keylen> * next = NULL;\n\tussKeyNode<keylen> * first = NULL;\n\tuint32_t val = 0;\n};\n\ntemplate<int keylen>\nstruct ussKeyNode\n{\n\tussValueNode<keylen> * parent = NULL;\n\tussKeyNode<keylen> * prev = NULL;\n\tussKeyNode<keylen> * next = NULL;\n\tchar key[keylen];\n};\nclass UnbiasedSpaceSaving : public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint capacity;//parameter\n\tuint32_t mem_in_bytes;\n\n\n\n\tint bytes_per_item = 36 + key_len * 2;\n\ttypedef ussKeyNode<key_len> SSKeyNode;\n\ttypedef ussValueNode<key_len> SSValNode;\n\tSSKeyNode *key_nodes;\n\tSSValNode *val_nodes;\n\n\n\t// SSKeyNode * key_node_pool[capacity];\n\tSSValNode ** val_node_pool;\n\t// int key_node_empty_cnt;\n\tint val_node_empty_cnt;\n\n\tSSValNode * tail_node;\n\n\tunordered_map<string, SSKeyNode *> hash_table;\n\t/*----optional according to your need----*/\n\tvoid append_new_key(const char * key) {\n\t\t// exact first key in tail node\n\t\tSSKeyNode * victim = tail_node->first;\n\t\trandom_device rd;\n\t\tif (!(rd() % (tail_node->val + 1)))\n\t\t{\n\t\t\tconst char * old_key = victim->key;\n\t\t\thash_table.erase(string(old_key, key_len));\n\t\t\thash_table[string(key, key_len)] = victim;\n\n\t\t\tmemcpy(victim->key, key, key_len);\n\t\t}\n\t\tadd_counter(victim);\n\t}\n\n\tvoid add_counter(SSKeyNode * my)\n\t{\n\t\tif (my->next == my)\n\t\t{\n\t\t\tmy->parent->val++;\n\t\t\tSSValNode * brother = my->parent->prev;\n\t\t\tif (brother && brother->val == my->parent->val)\n\t\t\t{\n\t\t\t\tval_node_pool[val_node_empty_cnt++] = my->parent; // release my parent\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t // cout << \"release\" << endl;\n\t\t\t\tSSValNode * old_parent = my->parent;\n\t\t\t\tif (old_parent->next)\n\t\t\t\t\told_parent->next->prev = old_parent->prev;\n\t\t\t\tif (old_parent == tail_node)\n\t\t\t\t\ttail_node = old_parent->prev;\n\t\t\t\told_parent->prev->next = old_parent->next;\n\t\t\t\tmy->parent = brother;\n\t\t\t\tmy->next = brother->first;\n\t\t\t\tmy->prev = brother->first->prev;\n\t\t\t\tmy->prev->next = my;\n\t\t\t\tmy->next->prev = my;\n\t\t\t\tbrother->first = my;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\tmy->next->prev = my->prev;\n\t\t\tmy->prev->next = my->next;\n\t\t\tif (my->parent->first == my) {\n\t\t\t\tmy->parent->first = my->next;\n\t\t\t}\n\n\t\t\tSSValNode * brother = my->parent->prev;\n\t\t\tif (brother && brother->val == my->parent->val + 1) {\n\t\t\t\tmy->parent = brother;\n\t\t\t\tmy->next = brother->first;\n\t\t\t\tmy->prev = brother->first->prev;\n\t\t\t\tmy->prev->next = my;\n\t\t\t\tmy->next->prev = my;\n\t\t\t\tbrother->first = my;\n\t\t\t}\n\t\t\telse {\n\t\t\t\tSSValNode * new_parent = val_node_pool[--val_node_empty_cnt];\n\t\t\t\tSSValNode * old_parent = my->parent;\n\t\t\t\tnew_parent->next = old_parent;\n\t\t\t\tnew_parent->prev = brother;\n\t\t\t\tnew_parent->val = old_parent->val + 1;\n\t\t\t\tnew_parent->first = my;\n\t\t\t\tif (brother)\n\t\t\t\t\tbrother->next = new_parent;\n\t\t\t\told_parent->prev = new_parent;\n\t\t\t\tmy->parent = new_parent;\n\t\t\t\tmy->next = my;\n\t\t\t\tmy->prev = my;\n\t\t\t}\n\t\t}\n\t}\npublic:\n\tusing SketchBase::sketch_name;//DO NOT change this declaration\n\tUnbiasedSpaceSaving()\n\t{\n\t\t/*constructed function MUST BT non-parameter!!!*/\n\t\tsketch_name = \"UnbiasedSpaceSaving\";//please keep sketch_name the same as class name and .h file name\n\t}\n\tvoid parameterSet(const std::string& parameterName, double parameterValue)\n\t{\n\t\t/*MUST have this function even empty function body*/\n\n\t\t/*----optional according to your need----*/\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid init()\n\t{\n\t\t/*MUST have this function even empty function body*/\n\n\t\t/*----optional according to your need----*/\n\t\t//capacity = mem_in_bytes / bytes_per_item;\n\t\tkey_nodes = new SSKeyNode[capacity];\n\t\tval_nodes = new SSValNode[capacity];\n\t\tval_node_pool = new SSValNode*[capacity];\n\t\tmemset(key_nodes, 0, sizeof(SSKeyNode)*capacity);\n memset(val_nodes, 0, sizeof(SSValNode)*capacity);\n\t\t\tval_node_empty_cnt = 0;\n\t\tfor (int i = 1; i < capacity; ++i) {\n\t\t\tval_node_pool[val_node_empty_cnt++] = val_nodes + i;\n\t\t}\n\n\t\ttail_node = val_nodes;\n\t\tval_nodes[0].val = 0;\n\t\tval_nodes->first = key_nodes;\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tkey_nodes[i].next = key_nodes + (i + 1 + capacity) % capacity;\n\t\t\tkey_nodes[i].prev = key_nodes + (i - 1 + capacity) % capacity;\n\t\t\tkey_nodes[i].parent = tail_node;\n\t\t}\n\n\t\thash_table.reserve(10 * capacity);\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid Insert(const char *str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change parameter type*/\n\n\t\t/*----optional according to your need----*/\n\t\tauto itr = hash_table.find(string(str, len));\n\t\tif (itr == hash_table.end())\n\t\t{\n\t\t\t// key not found\n\t\t\tappend_new_key(str);\n\t\t}\n\t\telse\n\t\t{\n\t\t\t// key found\n\t\t\tadd_counter(itr->second);\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tint frequencyQuery(const char *str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\treturn 0;\n\t\t/*----optional according to your need----*/\n\t}\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\n\t\tSSValNode * p = tail_node;\n\n\t\twhile (p->prev)\n\t\t\tp = p->prev;\n\n\t\tint i = 0;\n\t\tfor (; p; p = p->next) {\n\t\t\tSSKeyNode * v = p->first;\n\t\t\tdo {\n\t\t\t\ttopkItem.push_back(make_pair(string(v->key, key_len), p->val));\n\t\t\t\t//result[i++].second = p->val;\n\t\t\t\tv = v->next;\n\t\t\t\tif (i == k) {\n\t\t\t\t\treturn topkItem;\n\t\t\t\t}\n\t\t\t} while (v != p->first);\n\t\t}\n\n\t\treturn topkItem;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid reset()\n\t{\n\t\t/*MUST have this function,reset sketch to the initial state */\n\n\t\t/*----optional according to your need----*/\n\t\t//capacity = mem_in_bytes / bytes_per_item;\n\t\tkey_nodes = new SSKeyNode[capacity];\n\t\tval_nodes = new SSValNode[capacity];\n\t\tval_node_pool = new SSValNode*[capacity];\n\t\tmemset(key_nodes, 0, sizeof(SSKeyNode)*capacity);\n memset(val_nodes, 0, sizeof(SSValNode)*capacity);\n\t\t\tval_node_empty_cnt = 0;\n\t\tfor (int i = 1; i < capacity; ++i) {\n\t\t\tval_node_pool[val_node_empty_cnt++] = val_nodes + i;\n\t\t}\n\n\t\ttail_node = val_nodes;\n\t\tval_nodes[0].val = 0;\n\t\tval_nodes->first = key_nodes;\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tkey_nodes[i].next = key_nodes + (i + 1 + capacity) % capacity;\n\t\t\tkey_nodes[i].prev = key_nodes + (i - 1 + capacity) % capacity;\n\t\t\tkey_nodes[i].parent = tail_node;\n\t\t}\n\t\thash_table.clear();\n\t\thash_table.reserve(10 * capacity);\n\t\t/*----optional according to your need----*/\n\t}\n\t~UnbiasedSpaceSaving()\n\t{\n\t\t/*MUST have this function */\n\n\t\t/*----optional according to your need----*/\n\t\tdelete[] key_nodes;\n\t\tdelete[] val_nodes;\n\t\tdelete[] val_node_pool;\n\t\t/*----optional according to your need----*/\n\t}\n\n\t/*----optional You can add your function----*/\n};\nREGISTER(UnbiasedSpaceSaving);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/UnbiasedSpaceSaving.h"}, {"params": [{"field": "mem_in_bytes", "help": "", "type": "int"}, {"field": "level", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "name": "UnivMon_Count_Heap", "code": "#ifndef UnivMon_Count_Heap_H //must change this MACRO\n#define UnivMon_Count_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <cstdlib>\n#include <cstring>\n#include <string>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\n/*----optional according to your need----*/\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\nbool curCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\ntemplate<uint8_t univ_key_len, int d = 3>\nstruct CountHeap {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tint capacity;\n\tVK *heap;\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tCountHeap(int mem_in_bytes_,int capacity__) : mem_in_bytes(mem_in_bytes_), heap_element_num(0),capacity(capacity__) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\theap = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"CountHeap@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\n\tvoid insert(const char * key) {\n\t\tint ans[d];\n\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tint idx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tint polar = hash_polar[i]->Run(key, univ_key_len) % 2;\n\n\t\t\tcm_sketch[i][idx] += polar ? 1 : -1;\n\n\t\t\tint val = cm_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\n\t\tsort(ans, ans + d);\n\n\t\tint tmin;\n\t\tif (d % 2 == 0) {\n\t\t\ttmin = (ans[d / 2] + ans[d / 2 - 1]) / 2;\n\t\t}\n\t\telse {\n\t\t\ttmin = ans[d / 2];\n\t\t}\n\t\ttmin = (tmin <= 1) ? 1 : tmin;\n\n\t\tstring str_key = string(key, univ_key_len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint ans[d];\n\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tint idx = hash[i]->Run(str, len) % w;\n\t\t\tint polar = hash_polar[i]->Run(str, len) % 2;\n\n\t\t\n\t\t\tint val = cm_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\t\tsort(ans, ans + d);\n\t\tint tmin;\n\t\tif (d % 2 == 0) {\n\t\t\ttmin = (ans[d / 2] + ans[d / 2 - 1]) / 2;\n\t\t}\n\t\telse {\n\t\t\ttmin = ans[d / 2];\n\t\t}\n\t\ttmin = (tmin <= 1) ? 1 : tmin;\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~CountHeap() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\tdelete[] heap;\n\t\treturn;\n\t}\n};\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<univ_key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass UnivMon_Count_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint mem_in_bytes;//parameter\n\tint level;//parameter\n\tint capacity;//parameter\n\ttypedef CountHeap<4, 3> L2HitterDetector;\n\n L2HitterDetector ** sketches;\n BOBHash ** polar_hash;\n int element_num = 0;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n UnivMon_Count_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"UnivMon_Count_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"mem_in_bytes\")\n {\n\n\t\t\tmem_in_bytes = parameterValue;\n return;\n }\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"level\")\n {\n\t\t\tlevel = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\t//srand(time(0));\n\t\tdouble total = (1u << level) - 1;\n\t\tsketches = new L2HitterDetector*[level];\n\t\tpolar_hash = new BOBHash*[level];\n\t\tfor (int i = 0; i < level; ++i) {\n\t\t\tint mem_for_sk = int(mem_in_bytes) - level * (4 + 4) * capacity;\n\t\t\tint mem = int(mem_for_sk / level);\n\t\t\tsketches[i] = new L2HitterDetector(mem,capacity);\n\t\t\tauto idx = uint32_t(rand() % MAX_PRIME32);\n\t\t\tpolar_hash[i] = new BOBHash;\n\t\t\tpolar_hash[i]->SetSeed(idx);\n\t\t}\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\telement_num++;\n\t\tsketches[0]->insert(str);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tsketches[i]->insert(str);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\tsketches[0]->insert(str);\n\t\tint res = sketches[0]->frequencyQuery(str, len);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tres = min(res, sketches[i]->frequencyQuery(str, len));\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t\treturn res;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tunordered_map<std::string, int> results;\n\t\tvector<std::pair<std::string, int>> vec_top_k(k);\n\t\tfor (int i = level - 1; i >= 0; --i) {\n\t\t\tsketches[i]->get_top_k_with_frequency(k, vec_top_k);\n\t\t\tfor (auto kv : vec_top_k) {\n\t\t\t\tif (results.find(kv.first) == results.end()) {\n\t\t\t\t\tresults[kv.first] = kv.second;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > curItem;\n\t\tfor (auto & kv : results) {\n\t\t\t\n\t\t\t\tcurItem.emplace_back(kv);\n\t\t}\n\t\tsort(curItem.begin(), curItem.end(), curCMP);\n\t\tint t = curItem.size() > k ? k : curItem.size();\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(curItem[i]);\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\telement_num = 0;\n /*----optional according to your need----*/\n }\n ~UnivMon_Count_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n for (int i; i<level; ++i)\n {\n\t\t\tdelete sketches[i];\n\t\t\tdelete polar_hash[i];\n }\n delete [] sketches;\n delete [] polar_hash;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(UnivMon_Count_Heap);\n#endif//DO NOT change this file", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "path": "skbm/new_sketch/sketch/UnivMon_Count_Heap.h"}, {"path": "skbm/new_sketch/sketch/UnivMon_CM_Heap.h", "name": "UnivMon_CM_Heap", "params": [{"field": "mem_in_bytes", "help": "", "type": "int"}, {"field": "level", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef UnivMon_CM_Heap_H //must change this MACRO\n#define UnivMon_CM_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <cstdlib>\n#include <cstring>\n#include <string>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\n/*----optional according to your need----*/\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\nbool CMcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\ntemplate<uint8_t univ_key_len, int d = 3>\nstruct CMHeap {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tint capacity;\n\tVK *heap;\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tCMHeap(int mem_in_bytes_,int capacity__) : mem_in_bytes(mem_in_bytes_), heap_element_num(0),capacity(capacity__) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\theap = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"CMHeap@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\n\tvoid insert(const char * key) {\n\t\n\t\tint idx = hash[0]->Run(key, univ_key_len) % w;\n\t\tcm_sketch[0][idx] += 1;\n\t\tint tmin = cm_sketch[0][idx];\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tcm_sketch[i][idx] += 1;\n\t\t\ttmin = min(tmin,cm_sketch[i][idx]);\n\t\t}\n\n\t\t\n\n\t\tstring str_key = string(key, univ_key_len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint idx = hash[0]->Run(str, univ_key_len) % w;\n\t\tint tmin = cm_sketch[0][idx];\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(str, univ_key_len) % w;\n\t\t\ttmin = min(tmin, cm_sketch[i][idx]);\n\t\t}\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~CMHeap() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\tdelete[] heap;\n\t\treturn;\n\t}\n};\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<univ_key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass UnivMon_CM_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint mem_in_bytes;//parameter\n\tint level;//parameter\n\tint capacity;//parameter\n\ttypedef CMHeap<4, 3> L2HitterDetector;\n\n L2HitterDetector ** sketches;\n BOBHash ** polar_hash;\n int element_num = 0;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n UnivMon_CM_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"UnivMon_CM_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"mem_in_bytes\")\n {\n\n\t\t\tmem_in_bytes = parameterValue;\n return;\n }\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"level\")\n {\n\t\t\tlevel = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\t//srand(time(0));\n\t\tdouble total = (1u << level) - 1;\n\t\tsketches = new L2HitterDetector*[level];\n\t\tpolar_hash = new BOBHash*[level];\n\t\tfor (int i = 0; i < level; ++i) {\n\t\t\tint mem_for_sk = int(mem_in_bytes) - level * (4 + 4) * capacity;\n\t\t\tint mem = int(mem_for_sk / level);\n\t\t\tsketches[i] = new L2HitterDetector(mem,capacity);\n\t\t\tauto idx = uint32_t(rand() % MAX_PRIME32);\n\t\t\tpolar_hash[i] = new BOBHash;\n\t\t\tpolar_hash[i]->SetSeed(idx);\n\t\t}\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\telement_num++;\n\t\tsketches[0]->insert(str);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tsketches[i]->insert(str);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\tsketches[0]->insert(str);\n\t\tint res = sketches[0]->frequencyQuery(str, len);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tres = min(res, sketches[i]->frequencyQuery(str, len));\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t\treturn res;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tunordered_map<std::string, int> results;\n\t\tvector<std::pair<std::string, int>> vec_top_k(k);\n\t\tfor (int i = level - 1; i >= 0; --i) {\n\t\t\tsketches[i]->get_top_k_with_frequency(k, vec_top_k);\n\t\t\tfor (auto kv : vec_top_k) {\n\t\t\t\tif (results.find(kv.first) == results.end()) {\n\t\t\t\t\tresults[kv.first] = kv.second;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > curItem;\n\t\tfor (auto & kv : results) {\n\t\t\t\n\t\t\t\tcurItem.emplace_back(kv);\n\t\t}\n\t\tsort(curItem.begin(), curItem.end(), CMcurCMP);\n\t\tint t = curItem.size() > k ? k : curItem.size();\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(curItem[i]);\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\telement_num = 0;\n /*----optional according to your need----*/\n }\n ~UnivMon_CM_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n for (int i; i<level; ++i)\n {\n\t\t\tdelete sketches[i];\n\t\t\tdelete polar_hash[i];\n }\n delete [] sketches;\n delete [] polar_hash;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(UnivMon_CM_Heap);\n#endif//DO NOT change this file"}, {"tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "name": "UnivMon_CM_CU_Heap", "params": [{"field": "mem_in_bytes", "help": "", "type": "int"}, {"field": "level", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "code": "#ifndef UnivMon_CM_CU_Heap_H //must change this MACRO\n#define UnivMon_CM_CU_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <cstdlib>\n#include <cstring>\n#include <string>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\n/*----optional according to your need----*/\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\nbool CMCUHeapcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\ntemplate<uint8_t univ_key_len, int d = 3>\nstruct CMCUHeap {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tint capacity;\n\tVK *heap;\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tCMCUHeap(int mem_in_bytes_,int capacity__) : mem_in_bytes(mem_in_bytes_), heap_element_num(0),capacity(capacity__) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\theap = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"CMCUHeap@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\n\tvoid insert(const char * key) {\n\t\n\t\tint idx = hash[0]->Run(key, univ_key_len) % w;\n\t\tint tidx = 0;\n\t\tint tmin = cm_sketch[0][idx];\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tif (cm_sketch[i][idx] < tmin)\n\t\t\t{\n\t\t\t\ttmin = cm_sketch[i][idx];\n\t\t\t\ttidx = i;\n\t\t\t}\n\t\t}\n\t\ttmin = ++cm_sketch[tidx][hash[tidx]->Run(key, univ_key_len) % w];\n\n\t\t\n\n\t\tstring str_key = string(key, univ_key_len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint idx = hash[0]->Run(str, univ_key_len) % w;\n\t\tint tmin = cm_sketch[0][idx];\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(str, univ_key_len) % w;\n\t\t\ttmin = min(tmin, cm_sketch[i][idx]);\n\t\t}\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~CMCUHeap() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\tdelete[] heap;\n\t\treturn;\n\t}\n};\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<univ_key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass UnivMon_CM_CU_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint mem_in_bytes;//parameter\n\tint level;//parameter\n\tint capacity;//parameter\n\ttypedef CMCUHeap<4, 3> L2HitterDetector;\n\n L2HitterDetector ** sketches;\n BOBHash ** polar_hash;\n int element_num = 0;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n UnivMon_CM_CU_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"UnivMon_CM_CU_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"mem_in_bytes\")\n {\n\n\t\t\tmem_in_bytes = parameterValue;\n return;\n }\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"level\")\n {\n\t\t\tlevel = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\t//srand(time(0));\n\t\tdouble total = (1u << level) - 1;\n\t\tsketches = new L2HitterDetector*[level];\n\t\tpolar_hash = new BOBHash*[level];\n\t\tfor (int i = 0; i < level; ++i) {\n\t\t\tint mem_for_sk = int(mem_in_bytes) - level * (4 + 4) * capacity;\n\t\t\tint mem = int(mem_for_sk / level);\n\t\t\tsketches[i] = new L2HitterDetector(mem,capacity);\n\t\t\tauto idx = uint32_t(rand() % MAX_PRIME32);\n\t\t\tpolar_hash[i] = new BOBHash;\n\t\t\tpolar_hash[i]->SetSeed(idx);\n\t\t}\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\telement_num++;\n\t\tsketches[0]->insert(str);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tsketches[i]->insert(str);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\tsketches[0]->insert(str);\n\t\tint res = sketches[0]->frequencyQuery(str, len);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tres = min(res, sketches[i]->frequencyQuery(str, len));\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t\treturn res;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tunordered_map<std::string, int> results;\n\t\tvector<std::pair<std::string, int>> vec_top_k(k);\n\t\tfor (int i = level - 1; i >= 0; --i) {\n\t\t\tsketches[i]->get_top_k_with_frequency(k, vec_top_k);\n\t\t\tfor (auto kv : vec_top_k) {\n\t\t\t\tif (results.find(kv.first) == results.end()) {\n\t\t\t\t\tresults[kv.first] = kv.second;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > curItem;\n\t\tfor (auto & kv : results) {\n\t\t\t\n\t\t\t\tcurItem.emplace_back(kv);\n\t\t}\n\t\tsort(curItem.begin(), curItem.end(), CMCUHeapcurCMP);\n\t\tint t = curItem.size() > k ? k : curItem.size();\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(curItem[i]);\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\telement_num = 0;\n /*----optional according to your need----*/\n }\n ~UnivMon_CM_CU_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n for (int i; i<level; ++i)\n {\n\t\t\tdelete sketches[i];\n\t\t\tdelete polar_hash[i];\n }\n delete [] sketches;\n delete [] polar_hash;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(UnivMon_CM_CU_Heap);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/UnivMon_CM_CU_Heap.h"}, {"code": "#ifndef ColdFilter_CM_H //must change this MACRO\n#define ColdFilter_CM_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n#include \"../sketch/CM.h\"\n#pragma pack (16)\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n\n\n\n\n#ifndef _SC_H\n#define _SC_H\n\n//#include \"params.h\"\n#include <cstring>\n#include <algorithm>\n#include <emmintrin.h>\n#include <immintrin.h>\n#include <stdexcept>\n#include <x86intrin.h>\nusing namespace std;\n\n#define MAX_HASH_NUM_SC 4\n\n\ntemplate<int threshold = 240, int l1_ratio = 65>\nclass StreamClassifier\n{\n\t// static constexpr int bucket_num = 1000;\n\t// static constexpr int counter_num = 16;\n\tint memory_in_bytes,bucket_num,counter_num;\n\t\n\tint buffer_size;\n\tint remained;\n\tint d1;\n\tint m1_in_bytes;\n\tint d2;\n\tint m2_in_bytes;\n\n\n\t//uint32_t **ID;\n\t//int **counter;\n\t//int *cur_pos;\n\tuint32_t ID[1005][16] __attribute__((aligned(16)));\n\tint counter[1005][16];\n\tint cur_pos[1005];\n\n\tint w1;\n\tint w_word;\n\tint w2;\n\tuint64_t * L1;\n\tuint16_t * L2;\n\t//uint64_t L1[m1_in_bytes * 8 / 4 / 16]; // Layer 1 is organized as word, one word contains 16 counter, one counter consist of 4 bit\n\t//uint16_t L2[m2_in_bytes * 8 / 16]; // Layer 2 is organized as counter, one counter consist of 16 bit\n\n\tSketchBase * spa;\n\n\tBOBHash * bobhash1;\n\tBOBHash ** bobhash2;\n\n\tint cur_kick;\n\n\tvoid clear_data()\n\t{\n\t\tcur_kick = 0;\n\t\tmemset(ID, 0, sizeof(ID));\n\t\tmemset(counter, 0, sizeof(counter));\n\t\tmemset(cur_pos, 0, sizeof(cur_pos));\n\t\tmemset(L1, 0, sizeof(L1));\n\t\tmemset(L2, 0, sizeof(L2));\n\t}\npublic:\n\tStreamClassifier(int memory_in_bytes__,int bucket_num__,int counter_num__):memory_in_bytes(memory_in_bytes__),bucket_num(bucket_num__),counter_num(counter_num__)\n\t{\n\t\tbuffer_size = bucket_num * counter_num * 8;\n\t\tremained = memory_in_bytes - buffer_size;\n\n\t\td1 = 3;\n\t\tm1_in_bytes = int(remained * l1_ratio / 100.0);\n\t\td2 = 3;\n\t\tm2_in_bytes = int(remained * (100 - l1_ratio) / 100.0);\n\t\t//ID = new uint32_t*[bucket_num];\n\t\t//counter = new int*[bucket_num];\n\t\t//cur_pos = new int[bucket_num];\n\t\t//for (int i = 0; i < bucket_num; ++i)\n\t\t//{\n\n\t\t//\tchar* buf = new char[counter_num*4 + 16];\n\t\t//\tID[i] = (uint32_t *)(((uint64_t)buf + 15) & ~15);\n\t\t//\t//ID[i] = new uint32_t[counter_num];\n\t\t//\tcounter[i] = new int[counter_num];\n\t\t//}\n\t\tw1 = m1_in_bytes * 8 / 4;\n\t\tw_word = m1_in_bytes * 8 / 4 / 16;\n\t\tw2 = m2_in_bytes * 8 / 16;\n\t\t\n\t\tL1 = new uint64_t[m1_in_bytes * 8 / 4 / 16];\n\t\tL2 = new uint16_t[m2_in_bytes * 8 / 16];\n\n\t\tbobhash2 = new BOBHash*[d2];\n\t\tbobhash1 = new BOBHash;\n\t\tbobhash1->SetSeed(500);\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tbobhash2[i] = new BOBHash;\n\t\t\tbobhash2[i]->SetSeed(1000 + i);\n\t\t}\n\t\tclear_data();\n\t\tspa = NULL;\n\t}\n\n\tvoid print_basic_info()\n\t{\n\t\tprintf(\"Stream Classifer\\n\");\n\t\tprintf(\"\\tSIMD buffer: %d counters, %.4lf MB occupies\\n\", bucket_num * counter_num, bucket_num * counter_num * 8.0 / 1024 / 1024);\n\t\tprintf(\"\\tL1: %d counters, %.4lf MB occupies\\n\", w1, w1 * 0.5 / 1024 / 1024);\n\t\tprintf(\"\\tL2: %d counters, %.4lf MB occupies\\n\", w2, w2 * 2.0 / 1024 / 1024);\n\t}\n\n\t~StreamClassifier()\n\t{\n\t\tdelete bobhash1;\n\t\tfor (int i = 0; i < d2; i++)\n\t\t\tdelete bobhash2[i];\n\t}\n\n\t//periodical refreshing for continuous top-k;\n\tvoid init_array_period()\n\t{\n\t\tfor (int i = 0; i < w_word; i++) {\n\t\t\tuint64_t temp = L1[i];\n\n\t\t\ttemp = (temp & (0xF)) == 0xF ? temp : (temp & 0xFFFFFFFFFFFFFFF0);\n\t\t\ttemp = (temp & (0xF0)) == 0xF0 ? temp : (temp & 0xFFFFFFFFFFFFFF0F);\n\t\t\ttemp = (temp & (0xF00)) == 0xF00 ? temp : (temp & 0xFFFFFFFFFFFFF0FF);\n\t\t\ttemp = (temp & (0xF000)) == 0xF000 ? temp : (temp & 0xFFFFFFFFFFFF0FFF);\n\n\t\t\ttemp = (temp & (0xF0000)) == 0xF0000 ? temp : (temp & 0xFFFFFFFFFFF0FFFF);\n\t\t\ttemp = (temp & (0xF00000)) == 0xF00000 ? temp : (temp & 0xFFFFFFFFFF0FFFFF);\n\t\t\ttemp = (temp & (0xF000000)) == 0xF000000 ? temp : (temp & 0xFFFFFFFFF0FFFFFF);\n\t\t\ttemp = (temp & (0xF0000000)) == 0xF0000000 ? temp : (temp & 0xFFFFFFFF0FFFFFFF);\n\n\t\t\ttemp = (temp & (0xF00000000)) == 0xF00000000 ? temp : (temp & 0xFFFFFFF0FFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000)) == 0xF000000000 ? temp : (temp & 0xFFFFFF0FFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000)) == 0xF0000000000 ? temp : (temp & 0xFFFFF0FFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000)) == 0xF00000000000 ? temp : (temp & 0xFFFF0FFFFFFFFFFF);\n\n\t\t\ttemp = (temp & (0xF000000000000)) == 0xF000000000000 ? temp : (temp & 0xFFF0FFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000000)) == 0xF0000000000000 ? temp : (temp & 0xFF0FFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000000)) == 0xF00000000000000 ? temp : (temp & 0xF0FFFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000000000)) == 0xF000000000000000 ? temp : (temp & 0x0FFFFFFFFFFFFFFF);\n\n\t\t\tL1[i] = temp;\n\t\t}\n\n\t\tfor (int i = 0; i < w2; i++) {\n\t\t\tshort int temp = L2[i];\n\t\t\tL2[i] = (temp == threshold) ? temp : 0;\n\t\t}\n\t}\n\n\tvoid init_array_all()\n\t{\n\t\tmemset(L1, 0, sizeof(uint64_t) * w_word);\n\t\tmemset(L2, 0, sizeof(short int) * w2);\n\t}\n\n\tvoid init_spa(SketchBase * _spa)\n\t{\n\t\tspa = _spa;\n\t}\n\n\tvoid insert(const char * str)\n\t{\n\t\tuint32_t key;\n\t\tmemcpy(&key, str, 4);\n\t\tint bucket_id = key % bucket_num;\n\t\t// int bucket_id = key & 0x2FF;\n\n\t\t// the code below assume counter per buckets is 16\n\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\n\t\tconst __m128i item = _mm_set1_epi32((int)key);\n\t\tint matched;\n\n\t\tif (counter_num == 16) {\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tmatched = _mm_movemask_epi8(a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse if (counter_num == 4) {\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n\t\t\tcout << bucket_id << endl;\n#endif // DEBUG\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\tmatched = _mm_movemask_ps(*(__m128 *)&a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse {\n\t\t\tthrow std::logic_error(\"Not implemented.\");\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tif (matched != 0) {\n\t\t\t//return 32 if input is zero;\n\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched);\n\n\t\t\t++counter[bucket_id][matched_index];\n\t\t\treturn;\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint cur_pos_now = cur_pos[bucket_id];\n\t\tif (cur_pos_now != counter_num) {\n\t\t\t// printf(\"%d\\n\", cur_pos_now);\n\t\t\tID[bucket_id][cur_pos_now] = key;\n\t\t\t++counter[bucket_id][cur_pos_now];\n\t\t\t++cur_pos[bucket_id];\n\t\t\treturn;\n\t\t}\n\n\t\t/****************randomly choose one counter to kick!******************/\n\t\tchar tmp2[4];\n\t\tmemcpy(tmp2, &ID[bucket_id][cur_kick], 4);\n\t\tinsert_SC(tmp2, counter[bucket_id][cur_kick]);\n\t\tID[bucket_id][cur_kick] = key;\n\t\tcounter[bucket_id][cur_kick] = 1;\n\n\t\tcur_kick = (cur_kick + 1) % counter_num;\n\t}\n\n\tvoid insert_SC(const char * kick_ID, int kick_f)\n\t{\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint v1 = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM_SC];\n\t\tint index[MAX_HASH_NUM_SC];\n\t\tint offset[MAX_HASH_NUM_SC];\n\n\t\tuint64_t hash_value = bobhash1->Run(kick_ID, 4);\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\toffset[i] = (hash_value & 0xF);\n\t\t\tvalue[i] = (temp >> (offset[i] << 2)) & 0xF;\n\t\t\tv1 = std::min(v1, value[i]);\n\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tint temp2 = v1 + kick_f;\n\t\tif (temp2 <= 15) { // maybe optimized use SIMD?\n\t\t\tfor (int i = 0; i < d1; i++) {\n\t\t\t\tint temp3 = ((temp >> (offset[i] << 2)) & 0xF);\n\t\t\t\tif (temp3 < temp2) {\n\t\t\t\t\ttemp += ((uint64_t)(temp2 - temp3) << (offset[i] << 2));\n\t\t\t\t}\n\t\t\t}\n\t\t\tL1[word_index] = temp;\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\ttemp |= ((uint64_t)0xF << (offset[i] << 2));\n\t\t}\n\t\tL1[word_index] = temp;\n\n\t\tint delta1 = 15 - v1;\n\t\tkick_f -= delta1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tindex[i] = (bobhash2[i]->Run(kick_ID, 4)) % w2;\n\t\t\tvalue[i] = L2[index[i]];\n\t\t\tv2 = std::min(value[i], v2);\n\t\t}\n\n\t\ttemp2 = v2 + kick_f;\n\t\tif (temp2 <= threshold) {\n\t\t\tfor (int i = 0; i < d2; i++) {\n\t\t\t\tL2[index[i]] = (L2[index[i]] > temp2) ? L2[index[i]] : temp2;\n\t\t\t}\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tL2[index[i]] = threshold;\n\t\t}\n\n\t\tint delta2 = threshold - v2;\n\t\tkick_f -= delta2;\n\t\tfor (int i = 0; i < kick_f; ++i)\n\t\t{\n\t\t\tspa->Insert(kick_ID, 4);\n\t\t}\n\t}\n\n\tvoid refresh()\n\t{\n\t\tfor (int i = 0; i < bucket_num; i++) {\n\t\t\tfor (int j = 0; j < counter_num; j++) {\n\t\t\t\tinsert_SC(ID[i][j], counter[i][j]);\n\t\t\t\tID[i][j] = counter[i][j] = 0;\n\t\t\t}\n\t\t\tcur_pos[i] = 0;\n\t\t}\n\t\treturn;\n\t}\n\n\tint query(const char * key)\n\t{\n\t\tint v1 = 1 << 30;\n\n\t\t// constexpr int max_d = d1 > d2 ? d1 : d2;\n\t\t// int value[max_d];\n\t\t// int index[max_d];\n\t\t// int offset[max_d];\n\n\t\tuint32_t hash_value = (bobhash1->Run(key, 4));\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\tint of, val;\n\t\t\tof = (hash_value & 0xF);\n\t\t\tval = (temp >> (of << 2)) & 0xF;\n\t\t\tv1 = std::min(val, v1);\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tif (v1 != 15)\n\t\t\treturn v1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tint index = (bobhash2[i]->Run(key, 4)) % w2;\n\t\t\tint value = L2[index];\n\t\t\tv2 = std::min(value, v2);\n\t\t}\n\n\t\treturn v1 + v2;\n\t}\n};\n\n#endif//_SC_H\n/*----optional according to your need----*/\n\n#define T1 15\n#define T2 241\n#define THRESHOLD (T1 + T2)\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ColdFilter_CM: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint total_memory_in_bytes;//parameter\n\tint filter_memory_percent;//parameter\n\tint bucket_num;//parameter\n\tint counter_num;//parameter\n\tStreamClassifier<T2, 65> *sc;\n\tCM cm;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ColdFilter_CM()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ColdFilter_CM\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"total_memory_in_bytes\")\n {\n\n\t\t\ttotal_memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"filter_memory_percent\")\n {\n\t\t\tfilter_memory_percent= parameterValue;\n return;\n }\n if (parameterName==\"bucket_num\")\n {\n\t\t\t bucket_num = parameterValue;\n return;\n }\n\t\t if (parameterName == \"counter_num\")\n\t\t {\n\t\t\t counter_num = parameterValue;\n\t\t\t return;\n\t\t }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tsc = new StreamClassifier<T2, 65>(int64_t(total_memory_in_bytes) * filter_memory_percent / 100, bucket_num, counter_num);\n\t\tcm.parameterSet(\"memory_in_bytes\", int((total_memory_in_bytes) * (100 - filter_memory_percent) / 100));\n\t\tcm.parameterSet(\"hash_num\",3);\n\t\tcm.init();\n\t\tsc->init_spa(&cm);\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tsc->insert(str);\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint ret = sc->query(str);\n\t\tif (ret == THRESHOLD)\n\t\t\tret += cm.frequencyQuery(str,len);\n\n\t\treturn ret;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n return topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n \n /*----optional according to your need----*/\n }\n ~ColdFilter_CM()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\tdelete sc;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ColdFilter_CM);\n#endif//DO NOT change this file", "name": "ColdFilter_CM", "params": [{"field": "total_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_memory_percent", "help": "", "type": "int"}, {"field": "bucket_num", "help": "", "type": "int"}, {"field": "counter_num", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "path": "skbm/new_sketch/sketch/ColdFilter_CM.h"}, {"code": "#ifndef ColdFilter_Count_H //must change this MACRO\n#define ColdFilter_Count_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n#include \"../sketch/Count.h\"\n#pragma pack (16)\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n\n\n\n\n#ifndef _SC_H\n#define _SC_H\n\n//#include \"params.h\"\n#include <cstring>\n#include <algorithm>\n#include <emmintrin.h>\n#include <immintrin.h>\n#include <stdexcept>\n#include <x86intrin.h>\nusing namespace std;\n\n#define MAX_HASH_NUM_SC 4\n\n\ntemplate<int threshold = 240, int l1_ratio = 65>\nclass StreamClassifier\n{\n\t// static constexpr int bucket_num = 1000;\n\t// static constexpr int counter_num = 16;\n\tint memory_in_bytes,bucket_num,counter_num;\n\t\n\tint buffer_size;\n\tint remained;\n\tint d1;\n\tint m1_in_bytes;\n\tint d2;\n\tint m2_in_bytes;\n\n\n\t//uint32_t **ID;\n\t//int **counter;\n\t//int *cur_pos;\n\tuint32_t ID[1005][16] __attribute__((aligned(16)));\n\tint counter[1005][16];\n\tint cur_pos[1005];\n\n\tint w1;\n\tint w_word;\n\tint w2;\n\tuint64_t * L1;\n\tuint16_t * L2;\n\t//uint64_t L1[m1_in_bytes * 8 / 4 / 16]; // Layer 1 is organized as word, one word contains 16 counter, one counter consist of 4 bit\n\t//uint16_t L2[m2_in_bytes * 8 / 16]; // Layer 2 is organized as counter, one counter consist of 16 bit\n\n\tSketchBase * spa;\n\n\tBOBHash * bobhash1;\n\tBOBHash ** bobhash2;\n\n\tint cur_kick;\n\n\tvoid clear_data()\n\t{\n\t\tcur_kick = 0;\n\t\tmemset(ID, 0, sizeof(ID));\n\t\tmemset(counter, 0, sizeof(counter));\n\t\tmemset(cur_pos, 0, sizeof(cur_pos));\n\t\tmemset(L1, 0, sizeof(L1));\n\t\tmemset(L2, 0, sizeof(L2));\n\t}\npublic:\n\tStreamClassifier(int memory_in_bytes__,int bucket_num__,int counter_num__):memory_in_bytes(memory_in_bytes__),bucket_num(bucket_num__),counter_num(counter_num__)\n\t{\n\t\tbuffer_size = bucket_num * counter_num * 8;\n\t\tremained = memory_in_bytes - buffer_size;\n\n\t\td1 = 3;\n\t\tm1_in_bytes = int(remained * l1_ratio / 100.0);\n\t\td2 = 3;\n\t\tm2_in_bytes = int(remained * (100 - l1_ratio) / 100.0);\n\t\t//ID = new uint32_t*[bucket_num];\n\t\t//counter = new int*[bucket_num];\n\t\t//cur_pos = new int[bucket_num];\n\t\t//for (int i = 0; i < bucket_num; ++i)\n\t\t//{\n\n\t\t//\tchar* buf = new char[counter_num*4 + 16];\n\t\t//\tID[i] = (uint32_t *)(((uint64_t)buf + 15) & ~15);\n\t\t//\t//ID[i] = new uint32_t[counter_num];\n\t\t//\tcounter[i] = new int[counter_num];\n\t\t//}\n\t\tw1 = m1_in_bytes * 8 / 4;\n\t\tw_word = m1_in_bytes * 8 / 4 / 16;\n\t\tw2 = m2_in_bytes * 8 / 16;\n\t\t\n\t\tL1 = new uint64_t[m1_in_bytes * 8 / 4 / 16];\n\t\tL2 = new uint16_t[m2_in_bytes * 8 / 16];\n\n\t\tbobhash2 = new BOBHash*[d2];\n\t\tbobhash1 = new BOBHash;\n\t\tbobhash1->SetSeed(500);\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tbobhash2[i] = new BOBHash;\n\t\t\tbobhash2[i]->SetSeed(1000 + i);\n\t\t}\n\t\tclear_data();\n\t\tspa = NULL;\n\t}\n\n\tvoid print_basic_info()\n\t{\n\t\tprintf(\"Stream Classifer\\n\");\n\t\tprintf(\"\\tSIMD buffer: %d counters, %.4lf MB occupies\\n\", bucket_num * counter_num, bucket_num * counter_num * 8.0 / 1024 / 1024);\n\t\tprintf(\"\\tL1: %d counters, %.4lf MB occupies\\n\", w1, w1 * 0.5 / 1024 / 1024);\n\t\tprintf(\"\\tL2: %d counters, %.4lf MB occupies\\n\", w2, w2 * 2.0 / 1024 / 1024);\n\t}\n\n\t~StreamClassifier()\n\t{\n\t\tdelete bobhash1;\n\t\tfor (int i = 0; i < d2; i++)\n\t\t\tdelete bobhash2[i];\n\t}\n\n\t//periodical refreshing for continuous top-k;\n\tvoid init_array_period()\n\t{\n\t\tfor (int i = 0; i < w_word; i++) {\n\t\t\tuint64_t temp = L1[i];\n\n\t\t\ttemp = (temp & (0xF)) == 0xF ? temp : (temp & 0xFFFFFFFFFFFFFFF0);\n\t\t\ttemp = (temp & (0xF0)) == 0xF0 ? temp : (temp & 0xFFFFFFFFFFFFFF0F);\n\t\t\ttemp = (temp & (0xF00)) == 0xF00 ? temp : (temp & 0xFFFFFFFFFFFFF0FF);\n\t\t\ttemp = (temp & (0xF000)) == 0xF000 ? temp : (temp & 0xFFFFFFFFFFFF0FFF);\n\n\t\t\ttemp = (temp & (0xF0000)) == 0xF0000 ? temp : (temp & 0xFFFFFFFFFFF0FFFF);\n\t\t\ttemp = (temp & (0xF00000)) == 0xF00000 ? temp : (temp & 0xFFFFFFFFFF0FFFFF);\n\t\t\ttemp = (temp & (0xF000000)) == 0xF000000 ? temp : (temp & 0xFFFFFFFFF0FFFFFF);\n\t\t\ttemp = (temp & (0xF0000000)) == 0xF0000000 ? temp : (temp & 0xFFFFFFFF0FFFFFFF);\n\n\t\t\ttemp = (temp & (0xF00000000)) == 0xF00000000 ? temp : (temp & 0xFFFFFFF0FFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000)) == 0xF000000000 ? temp : (temp & 0xFFFFFF0FFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000)) == 0xF0000000000 ? temp : (temp & 0xFFFFF0FFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000)) == 0xF00000000000 ? temp : (temp & 0xFFFF0FFFFFFFFFFF);\n\n\t\t\ttemp = (temp & (0xF000000000000)) == 0xF000000000000 ? temp : (temp & 0xFFF0FFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000000)) == 0xF0000000000000 ? temp : (temp & 0xFF0FFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000000)) == 0xF00000000000000 ? temp : (temp & 0xF0FFFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000000000)) == 0xF000000000000000 ? temp : (temp & 0x0FFFFFFFFFFFFFFF);\n\n\t\t\tL1[i] = temp;\n\t\t}\n\n\t\tfor (int i = 0; i < w2; i++) {\n\t\t\tshort int temp = L2[i];\n\t\t\tL2[i] = (temp == threshold) ? temp : 0;\n\t\t}\n\t}\n\n\tvoid init_array_all()\n\t{\n\t\tmemset(L1, 0, sizeof(uint64_t) * w_word);\n\t\tmemset(L2, 0, sizeof(short int) * w2);\n\t}\n\n\tvoid init_spa(SketchBase * _spa)\n\t{\n\t\tspa = _spa;\n\t}\n\n\tvoid insert(const char * str)\n\t{\n\t\tuint32_t key;\n\t\tmemcpy(&key, str, 4);\n\t\tint bucket_id = key % bucket_num;\n\t\t// int bucket_id = key & 0x2FF;\n\n\t\t// the code below assume counter per buckets is 16\n\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\n\t\tconst __m128i item = _mm_set1_epi32((int)key);\n\t\tint matched;\n\n\t\tif (counter_num == 16) {\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tmatched = _mm_movemask_epi8(a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse if (counter_num == 4) {\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n\t\t\tcout << bucket_id << endl;\n#endif // DEBUG\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\tmatched = _mm_movemask_ps(*(__m128 *)&a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse {\n\t\t\tthrow std::logic_error(\"Not implemented.\");\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tif (matched != 0) {\n\t\t\t//return 32 if input is zero;\n\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched);\n\n\t\t\t++counter[bucket_id][matched_index];\n\t\t\treturn;\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint cur_pos_now = cur_pos[bucket_id];\n\t\tif (cur_pos_now != counter_num) {\n\t\t\t// printf(\"%d\\n\", cur_pos_now);\n\t\t\tID[bucket_id][cur_pos_now] = key;\n\t\t\t++counter[bucket_id][cur_pos_now];\n\t\t\t++cur_pos[bucket_id];\n\t\t\treturn;\n\t\t}\n\n\t\t/****************randomly choose one counter to kick!******************/\n\t\tchar tmp2[4];\n\t\tmemcpy(tmp2, &ID[bucket_id][cur_kick], 4);\n\t\tinsert_SC(tmp2, counter[bucket_id][cur_kick]);\n\t\tID[bucket_id][cur_kick] = key;\n\t\tcounter[bucket_id][cur_kick] = 1;\n\n\t\tcur_kick = (cur_kick + 1) % counter_num;\n\t}\n\n\tvoid insert_SC(const char * kick_ID, int kick_f)\n\t{\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint v1 = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM_SC];\n\t\tint index[MAX_HASH_NUM_SC];\n\t\tint offset[MAX_HASH_NUM_SC];\n\n\t\tuint64_t hash_value = bobhash1->Run(kick_ID, 4);\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\toffset[i] = (hash_value & 0xF);\n\t\t\tvalue[i] = (temp >> (offset[i] << 2)) & 0xF;\n\t\t\tv1 = std::min(v1, value[i]);\n\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tint temp2 = v1 + kick_f;\n\t\tif (temp2 <= 15) { // maybe optimized use SIMD?\n\t\t\tfor (int i = 0; i < d1; i++) {\n\t\t\t\tint temp3 = ((temp >> (offset[i] << 2)) & 0xF);\n\t\t\t\tif (temp3 < temp2) {\n\t\t\t\t\ttemp += ((uint64_t)(temp2 - temp3) << (offset[i] << 2));\n\t\t\t\t}\n\t\t\t}\n\t\t\tL1[word_index] = temp;\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\ttemp |= ((uint64_t)0xF << (offset[i] << 2));\n\t\t}\n\t\tL1[word_index] = temp;\n\n\t\tint delta1 = 15 - v1;\n\t\tkick_f -= delta1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tindex[i] = (bobhash2[i]->Run(kick_ID, 4)) % w2;\n\t\t\tvalue[i] = L2[index[i]];\n\t\t\tv2 = std::min(value[i], v2);\n\t\t}\n\n\t\ttemp2 = v2 + kick_f;\n\t\tif (temp2 <= threshold) {\n\t\t\tfor (int i = 0; i < d2; i++) {\n\t\t\t\tL2[index[i]] = (L2[index[i]] > temp2) ? L2[index[i]] : temp2;\n\t\t\t}\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tL2[index[i]] = threshold;\n\t\t}\n\n\t\tint delta2 = threshold - v2;\n\t\tkick_f -= delta2;\n\t\tfor (int i = 0; i < kick_f; ++i)\n\t\t{\n\t\t\tspa->Insert(kick_ID, 4);\n\t\t}\n\t}\n\n\tvoid refresh()\n\t{\n\t\tfor (int i = 0; i < bucket_num; i++) {\n\t\t\tfor (int j = 0; j < counter_num; j++) {\n\t\t\t\tinsert_SC(ID[i][j], counter[i][j]);\n\t\t\t\tID[i][j] = counter[i][j] = 0;\n\t\t\t}\n\t\t\tcur_pos[i] = 0;\n\t\t}\n\t\treturn;\n\t}\n\n\tint query(const char * key)\n\t{\n\t\tint v1 = 1 << 30;\n\n\t\t// constexpr int max_d = d1 > d2 ? d1 : d2;\n\t\t// int value[max_d];\n\t\t// int index[max_d];\n\t\t// int offset[max_d];\n\n\t\tuint32_t hash_value = (bobhash1->Run(key, 4));\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\tint of, val;\n\t\t\tof = (hash_value & 0xF);\n\t\t\tval = (temp >> (of << 2)) & 0xF;\n\t\t\tv1 = std::min(val, v1);\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tif (v1 != 15)\n\t\t\treturn v1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tint index = (bobhash2[i]->Run(key, 4)) % w2;\n\t\t\tint value = L2[index];\n\t\t\tv2 = std::min(value, v2);\n\t\t}\n\n\t\treturn v1 + v2;\n\t}\n};\n\n#endif//_SC_H\n/*----optional according to your need----*/\n\n#define T1 15\n#define T2 241\n#define THRESHOLD (T1 + T2)\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ColdFilter_Count: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint total_memory_in_bytes;//parameter\n\tint filter_memory_percent;//parameter\n\tint bucket_num;//parameter\n\tint counter_num;//parameter\n\tStreamClassifier<T2, 65> *sc;\n\tCount cm;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ColdFilter_Count()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ColdFilter_Count\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"total_memory_in_bytes\")\n {\n\n\t\t\ttotal_memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"filter_memory_percent\")\n {\n\t\t\tfilter_memory_percent= parameterValue;\n return;\n }\n if (parameterName==\"bucket_num\")\n {\n\t\t\t bucket_num = parameterValue;\n return;\n }\n\t\t if (parameterName == \"counter_num\")\n\t\t {\n\t\t\t counter_num = parameterValue;\n\t\t\t return;\n\t\t }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tsc = new StreamClassifier<T2, 65>(int64_t(total_memory_in_bytes) * filter_memory_percent / 100, bucket_num, counter_num);\n\t\tcm.parameterSet(\"memory_in_bytes\", int((total_memory_in_bytes) * (100 - filter_memory_percent) / 100));\n\t\tcm.parameterSet(\"hash_num\",3);\n\t\tcm.init();\n\t\tsc->init_spa(&cm);\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tsc->insert(str);\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint ret = sc->query(str);\n\t\tif (ret == THRESHOLD)\n\t\t\tret += cm.frequencyQuery(str,len);\n\n\t\treturn ret;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n return topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n \n /*----optional according to your need----*/\n }\n ~ColdFilter_Count()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\tdelete sc;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ColdFilter_Count);\n#endif//DO NOT change this file", "name": "ColdFilter_Count", "params": [{"field": "total_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_memory_percent", "help": "", "type": "int"}, {"field": "bucket_num", "help": "", "type": "int"}, {"field": "counter_num", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "path": "skbm/new_sketch/sketch/ColdFilter_Count.h"}, {"code": "#ifndef ColdFilter_Count_Heap_H //must change this MACRO\n#define ColdFilter_Count_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n#include \"../sketch/Count_Heap.h\"\n#pragma pack (16)\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n\n\n\n\n#ifndef _SC_H\n#define _SC_H\n\n//#include \"params.h\"\n#include <cstring>\n#include <algorithm>\n#include <emmintrin.h>\n#include <immintrin.h>\n#include <stdexcept>\n#include <x86intrin.h>\nusing namespace std;\n\n#define MAX_HASH_NUM_SC 4\n\n\ntemplate<int threshold = 240, int l1_ratio = 65>\nclass StreamClassifier\n{\n\t// static constexpr int bucket_num = 1000;\n\t// static constexpr int counter_num = 16;\n\tint memory_in_bytes,bucket_num,counter_num;\n\t\n\tint buffer_size;\n\tint remained;\n\tint d1;\n\tint m1_in_bytes;\n\tint d2;\n\tint m2_in_bytes;\n\n\n\t//uint32_t **ID;\n\t//int **counter;\n\t//int *cur_pos;\n\tuint32_t ID[1005][16] __attribute__((aligned(16)));\n\tint counter[1005][16];\n\tint cur_pos[1005];\n\n\tint w1;\n\tint w_word;\n\tint w2;\n\tuint64_t * L1;\n\tuint16_t * L2;\n\t//uint64_t L1[m1_in_bytes * 8 / 4 / 16]; // Layer 1 is organized as word, one word contains 16 counter, one counter consist of 4 bit\n\t//uint16_t L2[m2_in_bytes * 8 / 16]; // Layer 2 is organized as counter, one counter consist of 16 bit\n\n\tSketchBase * spa;\n\n\tBOBHash * bobhash1;\n\tBOBHash ** bobhash2;\n\n\tint cur_kick;\n\n\tvoid clear_data()\n\t{\n\t\tcur_kick = 0;\n\t\tmemset(ID, 0, sizeof(ID));\n\t\tmemset(counter, 0, sizeof(counter));\n\t\tmemset(cur_pos, 0, sizeof(cur_pos));\n\t\tmemset(L1, 0, sizeof(L1));\n\t\tmemset(L2, 0, sizeof(L2));\n\t}\npublic:\n\tStreamClassifier(int memory_in_bytes__,int bucket_num__,int counter_num__):memory_in_bytes(memory_in_bytes__),bucket_num(bucket_num__),counter_num(counter_num__)\n\t{\n\t\tbuffer_size = bucket_num * counter_num * 8;\n\t\tremained = memory_in_bytes - buffer_size;\n\n\t\td1 = 3;\n\t\tm1_in_bytes = int(remained * l1_ratio / 100.0);\n\t\td2 = 3;\n\t\tm2_in_bytes = int(remained * (100 - l1_ratio) / 100.0);\n\t\t//ID = new uint32_t*[bucket_num];\n\t\t//counter = new int*[bucket_num];\n\t\t//cur_pos = new int[bucket_num];\n\t\t//for (int i = 0; i < bucket_num; ++i)\n\t\t//{\n\n\t\t//\tchar* buf = new char[counter_num*4 + 16];\n\t\t//\tID[i] = (uint32_t *)(((uint64_t)buf + 15) & ~15);\n\t\t//\t//ID[i] = new uint32_t[counter_num];\n\t\t//\tcounter[i] = new int[counter_num];\n\t\t//}\n\t\tw1 = m1_in_bytes * 8 / 4;\n\t\tw_word = m1_in_bytes * 8 / 4 / 16;\n\t\tw2 = m2_in_bytes * 8 / 16;\n\t\t\n\t\tL1 = new uint64_t[m1_in_bytes * 8 / 4 / 16];\n\t\tL2 = new uint16_t[m2_in_bytes * 8 / 16];\n\n\t\tbobhash2 = new BOBHash*[d2];\n\t\tbobhash1 = new BOBHash;\n\t\tbobhash1->SetSeed(500);\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tbobhash2[i] = new BOBHash;\n\t\t\tbobhash2[i]->SetSeed(1000 + i);\n\t\t}\n\t\tclear_data();\n\t\tspa = NULL;\n\t}\n\n\tvoid print_basic_info()\n\t{\n\t\tprintf(\"Stream Classifer\\n\");\n\t\tprintf(\"\\tSIMD buffer: %d counters, %.4lf MB occupies\\n\", bucket_num * counter_num, bucket_num * counter_num * 8.0 / 1024 / 1024);\n\t\tprintf(\"\\tL1: %d counters, %.4lf MB occupies\\n\", w1, w1 * 0.5 / 1024 / 1024);\n\t\tprintf(\"\\tL2: %d counters, %.4lf MB occupies\\n\", w2, w2 * 2.0 / 1024 / 1024);\n\t}\n\n\t~StreamClassifier()\n\t{\n\t\tdelete bobhash1;\n\t\tfor (int i = 0; i < d2; i++)\n\t\t\tdelete bobhash2[i];\n\t}\n\n\t//periodical refreshing for continuous top-k;\n\tvoid init_array_period()\n\t{\n\t\tfor (int i = 0; i < w_word; i++) {\n\t\t\tuint64_t temp = L1[i];\n\n\t\t\ttemp = (temp & (0xF)) == 0xF ? temp : (temp & 0xFFFFFFFFFFFFFFF0);\n\t\t\ttemp = (temp & (0xF0)) == 0xF0 ? temp : (temp & 0xFFFFFFFFFFFFFF0F);\n\t\t\ttemp = (temp & (0xF00)) == 0xF00 ? temp : (temp & 0xFFFFFFFFFFFFF0FF);\n\t\t\ttemp = (temp & (0xF000)) == 0xF000 ? temp : (temp & 0xFFFFFFFFFFFF0FFF);\n\n\t\t\ttemp = (temp & (0xF0000)) == 0xF0000 ? temp : (temp & 0xFFFFFFFFFFF0FFFF);\n\t\t\ttemp = (temp & (0xF00000)) == 0xF00000 ? temp : (temp & 0xFFFFFFFFFF0FFFFF);\n\t\t\ttemp = (temp & (0xF000000)) == 0xF000000 ? temp : (temp & 0xFFFFFFFFF0FFFFFF);\n\t\t\ttemp = (temp & (0xF0000000)) == 0xF0000000 ? temp : (temp & 0xFFFFFFFF0FFFFFFF);\n\n\t\t\ttemp = (temp & (0xF00000000)) == 0xF00000000 ? temp : (temp & 0xFFFFFFF0FFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000)) == 0xF000000000 ? temp : (temp & 0xFFFFFF0FFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000)) == 0xF0000000000 ? temp : (temp & 0xFFFFF0FFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000)) == 0xF00000000000 ? temp : (temp & 0xFFFF0FFFFFFFFFFF);\n\n\t\t\ttemp = (temp & (0xF000000000000)) == 0xF000000000000 ? temp : (temp & 0xFFF0FFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000000)) == 0xF0000000000000 ? temp : (temp & 0xFF0FFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000000)) == 0xF00000000000000 ? temp : (temp & 0xF0FFFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000000000)) == 0xF000000000000000 ? temp : (temp & 0x0FFFFFFFFFFFFFFF);\n\n\t\t\tL1[i] = temp;\n\t\t}\n\n\t\tfor (int i = 0; i < w2; i++) {\n\t\t\tshort int temp = L2[i];\n\t\t\tL2[i] = (temp == threshold) ? temp : 0;\n\t\t}\n\t}\n\n\tvoid init_array_all()\n\t{\n\t\tmemset(L1, 0, sizeof(uint64_t) * w_word);\n\t\tmemset(L2, 0, sizeof(short int) * w2);\n\t}\n\n\tvoid init_spa(SketchBase * _spa)\n\t{\n\t\tspa = _spa;\n\t}\n\n\tvoid insert(const char * str)\n\t{\n\t\tuint32_t key;\n\t\tmemcpy(&key, str, 4);\n\t\tint bucket_id = key % bucket_num;\n\t\t// int bucket_id = key & 0x2FF;\n\n\t\t// the code below assume counter per buckets is 16\n\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\n\t\tconst __m128i item = _mm_set1_epi32((int)key);\n\t\tint matched;\n\n\t\tif (counter_num == 16) {\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tmatched = _mm_movemask_epi8(a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse if (counter_num == 4) {\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n\t\t\tcout << bucket_id << endl;\n#endif // DEBUG\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\tmatched = _mm_movemask_ps(*(__m128 *)&a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse {\n\t\t\tthrow std::logic_error(\"Not implemented.\");\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tif (matched != 0) {\n\t\t\t//return 32 if input is zero;\n\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched);\n\n\t\t\t++counter[bucket_id][matched_index];\n\t\t\treturn;\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint cur_pos_now = cur_pos[bucket_id];\n\t\tif (cur_pos_now != counter_num) {\n\t\t\t// printf(\"%d\\n\", cur_pos_now);\n\t\t\tID[bucket_id][cur_pos_now] = key;\n\t\t\t++counter[bucket_id][cur_pos_now];\n\t\t\t++cur_pos[bucket_id];\n\t\t\treturn;\n\t\t}\n\n\t\t/****************randomly choose one counter to kick!******************/\n\t\tchar tmp2[4];\n\t\tmemcpy(tmp2, &ID[bucket_id][cur_kick], 4);\n\t\tinsert_SC(tmp2, counter[bucket_id][cur_kick]);\n\t\tID[bucket_id][cur_kick] = key;\n\t\tcounter[bucket_id][cur_kick] = 1;\n\n\t\tcur_kick = (cur_kick + 1) % counter_num;\n\t}\n\n\tvoid insert_SC(const char * kick_ID, int kick_f)\n\t{\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint v1 = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM_SC];\n\t\tint index[MAX_HASH_NUM_SC];\n\t\tint offset[MAX_HASH_NUM_SC];\n\n\t\tuint64_t hash_value = bobhash1->Run(kick_ID, 4);\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\toffset[i] = (hash_value & 0xF);\n\t\t\tvalue[i] = (temp >> (offset[i] << 2)) & 0xF;\n\t\t\tv1 = std::min(v1, value[i]);\n\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tint temp2 = v1 + kick_f;\n\t\tif (temp2 <= 15) { // maybe optimized use SIMD?\n\t\t\tfor (int i = 0; i < d1; i++) {\n\t\t\t\tint temp3 = ((temp >> (offset[i] << 2)) & 0xF);\n\t\t\t\tif (temp3 < temp2) {\n\t\t\t\t\ttemp += ((uint64_t)(temp2 - temp3) << (offset[i] << 2));\n\t\t\t\t}\n\t\t\t}\n\t\t\tL1[word_index] = temp;\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\ttemp |= ((uint64_t)0xF << (offset[i] << 2));\n\t\t}\n\t\tL1[word_index] = temp;\n\n\t\tint delta1 = 15 - v1;\n\t\tkick_f -= delta1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tindex[i] = (bobhash2[i]->Run(kick_ID, 4)) % w2;\n\t\t\tvalue[i] = L2[index[i]];\n\t\t\tv2 = std::min(value[i], v2);\n\t\t}\n\n\t\ttemp2 = v2 + kick_f;\n\t\tif (temp2 <= threshold) {\n\t\t\tfor (int i = 0; i < d2; i++) {\n\t\t\t\tL2[index[i]] = (L2[index[i]] > temp2) ? L2[index[i]] : temp2;\n\t\t\t}\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tL2[index[i]] = threshold;\n\t\t}\n\n\t\tint delta2 = threshold - v2;\n\t\tkick_f -= delta2;\n\t\tfor (int i = 0; i < kick_f; ++i)\n\t\t{\n\t\t\tspa->Insert(kick_ID, 4);\n\t\t}\n\t}\n\n\tvoid refresh()\n\t{\n\t\tfor (int i = 0; i < bucket_num; i++) {\n\t\t\tfor (int j = 0; j < counter_num; j++) {\n\t\t\t\tinsert_SC(ID[i][j], counter[i][j]);\n\t\t\t\tID[i][j] = counter[i][j] = 0;\n\t\t\t}\n\t\t\tcur_pos[i] = 0;\n\t\t}\n\t\treturn;\n\t}\n\n\tint query(const char * key)\n\t{\n\t\tint v1 = 1 << 30;\n\n\t\t// constexpr int max_d = d1 > d2 ? d1 : d2;\n\t\t// int value[max_d];\n\t\t// int index[max_d];\n\t\t// int offset[max_d];\n\n\t\tuint32_t hash_value = (bobhash1->Run(key, 4));\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\tint of, val;\n\t\t\tof = (hash_value & 0xF);\n\t\t\tval = (temp >> (of << 2)) & 0xF;\n\t\t\tv1 = std::min(val, v1);\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tif (v1 != 15)\n\t\t\treturn v1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tint index = (bobhash2[i]->Run(key, 4)) % w2;\n\t\t\tint value = L2[index];\n\t\t\tv2 = std::min(value, v2);\n\t\t}\n\n\t\treturn v1 + v2;\n\t}\n};\n\n#endif//_SC_H\n/*----optional according to your need----*/\n\n#define T1 15\n#define T2 241\n#define THRESHOLD (T1 + T2)\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ColdFilter_Count_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint total_memory_in_bytes;//parameter\n\tint filter_memory_percent;//parameter\n\tint bucket_num;//parameter\n\tint counter_num;//parameter\n\tint capacity;\n\tStreamClassifier<T2, 65> *sc;\n\tCount_Heap sketch;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ColdFilter_Count_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ColdFilter_CM_CU_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"total_memory_in_bytes\")\n {\n\n\t\t\ttotal_memory_in_bytes = parameterValue;\n return;\n }\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"filter_memory_percent\")\n {\n\t\t\tfilter_memory_percent= parameterValue;\n return;\n }\n if (parameterName==\"bucket_num\")\n {\n\t\t\t bucket_num = parameterValue;\n return;\n }\n\t\t if (parameterName == \"counter_num\")\n\t\t {\n\t\t\t counter_num = parameterValue;\n\t\t\t return;\n\t\t }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tsc = new StreamClassifier<T2, 65>(int64_t(total_memory_in_bytes) * filter_memory_percent / 100, bucket_num, counter_num);\n\t\tsketch.parameterSet(\"memory_in_bytes\", int((total_memory_in_bytes) * (100 - filter_memory_percent) / 100));\n\t\tsketch.parameterSet(\"hash_num\",3);\n\t\tsketch.parameterSet(\"capacity\", capacity);\n\t\tsketch.init();\n\t\tsc->init_spa(&sketch);\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tsc->insert(str);\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint ret = sc->query(str);\n\t\tif (ret == THRESHOLD)\n\t\t\tret += sketch.frequencyQuery(str,len);\n\n\t\treturn ret;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n return sketch.topkQuery(k);\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n \n /*----optional according to your need----*/\n }\n ~ColdFilter_Count_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\tdelete sc;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ColdFilter_Count_Heap);\n#endif//DO NOT change this file", "name": "ColdFilter_Count_Heap", "params": [{"field": "total_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_memory_percent", "help": "", "type": "int"}, {"field": "bucket_num", "help": "", "type": "int"}, {"field": "counter_num", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "path": "skbm/new_sketch/sketch/ColdFilter_Count_Heap.h"}, {"code": "#ifndef ColdFilter_SpaceSaving_H //must change this MACRO\n#define ColdFilter_SpaceSaving_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n#include \"../sketch/DeterministicSpaceSaving.h\"\n#pragma pack (16)\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n\n\n\n\n#ifndef _SC_H\n#define _SC_H\n\n//#include \"params.h\"\n#include <cstring>\n#include <algorithm>\n#include <emmintrin.h>\n#include <immintrin.h>\n#include <stdexcept>\n#include <x86intrin.h>\nusing namespace std;\n\n#define MAX_HASH_NUM_SC 4\n\n\ntemplate<int threshold = 240, int l1_ratio = 65>\nclass StreamClassifier\n{\n\t// static constexpr int bucket_num = 1000;\n\t// static constexpr int counter_num = 16;\n\tint memory_in_bytes,bucket_num,counter_num;\n\t\n\tint buffer_size;\n\tint remained;\n\tint d1;\n\tint m1_in_bytes;\n\tint d2;\n\tint m2_in_bytes;\n\n\n\t//uint32_t **ID;\n\t//int **counter;\n\t//int *cur_pos;\n\tuint32_t ID[1005][16] __attribute__((aligned(16)));\n\tint counter[1005][16];\n\tint cur_pos[1005];\n\n\tint w1;\n\tint w_word;\n\tint w2;\n\tuint64_t * L1;\n\tuint16_t * L2;\n\t//uint64_t L1[m1_in_bytes * 8 / 4 / 16]; // Layer 1 is organized as word, one word contains 16 counter, one counter consist of 4 bit\n\t//uint16_t L2[m2_in_bytes * 8 / 16]; // Layer 2 is organized as counter, one counter consist of 16 bit\n\n\tSketchBase * spa;\n\n\tBOBHash * bobhash1;\n\tBOBHash ** bobhash2;\n\n\tint cur_kick;\n\n\tvoid clear_data()\n\t{\n\t\tcur_kick = 0;\n\t\tmemset(ID, 0, sizeof(ID));\n\t\tmemset(counter, 0, sizeof(counter));\n\t\tmemset(cur_pos, 0, sizeof(cur_pos));\n\t\tmemset(L1, 0, sizeof(L1));\n\t\tmemset(L2, 0, sizeof(L2));\n\t}\npublic:\n\tStreamClassifier(int memory_in_bytes__,int bucket_num__,int counter_num__):memory_in_bytes(memory_in_bytes__),bucket_num(bucket_num__),counter_num(counter_num__)\n\t{\n\t\tbuffer_size = bucket_num * counter_num * 8;\n\t\tremained = memory_in_bytes - buffer_size;\n\n\t\td1 = 3;\n\t\tm1_in_bytes = int(remained * l1_ratio / 100.0);\n\t\td2 = 3;\n\t\tm2_in_bytes = int(remained * (100 - l1_ratio) / 100.0);\n\t\t//ID = new uint32_t*[bucket_num];\n\t\t//counter = new int*[bucket_num];\n\t\t//cur_pos = new int[bucket_num];\n\t\t//for (int i = 0; i < bucket_num; ++i)\n\t\t//{\n\n\t\t//\tchar* buf = new char[counter_num*4 + 16];\n\t\t//\tID[i] = (uint32_t *)(((uint64_t)buf + 15) & ~15);\n\t\t//\t//ID[i] = new uint32_t[counter_num];\n\t\t//\tcounter[i] = new int[counter_num];\n\t\t//}\n\t\tw1 = m1_in_bytes * 8 / 4;\n\t\tw_word = m1_in_bytes * 8 / 4 / 16;\n\t\tw2 = m2_in_bytes * 8 / 16;\n\t\t\n\t\tL1 = new uint64_t[m1_in_bytes * 8 / 4 / 16];\n\t\tL2 = new uint16_t[m2_in_bytes * 8 / 16];\n\n\t\tbobhash2 = new BOBHash*[d2];\n\t\tbobhash1 = new BOBHash;\n\t\tbobhash1->SetSeed(500);\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tbobhash2[i] = new BOBHash;\n\t\t\tbobhash2[i]->SetSeed(1000 + i);\n\t\t}\n\t\tclear_data();\n\t\tspa = NULL;\n\t}\n\n\tvoid print_basic_info()\n\t{\n\t\tprintf(\"Stream Classifer\\n\");\n\t\tprintf(\"\\tSIMD buffer: %d counters, %.4lf MB occupies\\n\", bucket_num * counter_num, bucket_num * counter_num * 8.0 / 1024 / 1024);\n\t\tprintf(\"\\tL1: %d counters, %.4lf MB occupies\\n\", w1, w1 * 0.5 / 1024 / 1024);\n\t\tprintf(\"\\tL2: %d counters, %.4lf MB occupies\\n\", w2, w2 * 2.0 / 1024 / 1024);\n\t}\n\n\t~StreamClassifier()\n\t{\n\t\tdelete bobhash1;\n\t\tfor (int i = 0; i < d2; i++)\n\t\t\tdelete bobhash2[i];\n\t}\n\n\t//periodical refreshing for continuous top-k;\n\tvoid init_array_period()\n\t{\n\t\tfor (int i = 0; i < w_word; i++) {\n\t\t\tuint64_t temp = L1[i];\n\n\t\t\ttemp = (temp & (0xF)) == 0xF ? temp : (temp & 0xFFFFFFFFFFFFFFF0);\n\t\t\ttemp = (temp & (0xF0)) == 0xF0 ? temp : (temp & 0xFFFFFFFFFFFFFF0F);\n\t\t\ttemp = (temp & (0xF00)) == 0xF00 ? temp : (temp & 0xFFFFFFFFFFFFF0FF);\n\t\t\ttemp = (temp & (0xF000)) == 0xF000 ? temp : (temp & 0xFFFFFFFFFFFF0FFF);\n\n\t\t\ttemp = (temp & (0xF0000)) == 0xF0000 ? temp : (temp & 0xFFFFFFFFFFF0FFFF);\n\t\t\ttemp = (temp & (0xF00000)) == 0xF00000 ? temp : (temp & 0xFFFFFFFFFF0FFFFF);\n\t\t\ttemp = (temp & (0xF000000)) == 0xF000000 ? temp : (temp & 0xFFFFFFFFF0FFFFFF);\n\t\t\ttemp = (temp & (0xF0000000)) == 0xF0000000 ? temp : (temp & 0xFFFFFFFF0FFFFFFF);\n\n\t\t\ttemp = (temp & (0xF00000000)) == 0xF00000000 ? temp : (temp & 0xFFFFFFF0FFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000)) == 0xF000000000 ? temp : (temp & 0xFFFFFF0FFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000)) == 0xF0000000000 ? temp : (temp & 0xFFFFF0FFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000)) == 0xF00000000000 ? temp : (temp & 0xFFFF0FFFFFFFFFFF);\n\n\t\t\ttemp = (temp & (0xF000000000000)) == 0xF000000000000 ? temp : (temp & 0xFFF0FFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000000)) == 0xF0000000000000 ? temp : (temp & 0xFF0FFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000000)) == 0xF00000000000000 ? temp : (temp & 0xF0FFFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000000000)) == 0xF000000000000000 ? temp : (temp & 0x0FFFFFFFFFFFFFFF);\n\n\t\t\tL1[i] = temp;\n\t\t}\n\n\t\tfor (int i = 0; i < w2; i++) {\n\t\t\tshort int temp = L2[i];\n\t\t\tL2[i] = (temp == threshold) ? temp : 0;\n\t\t}\n\t}\n\n\tvoid init_array_all()\n\t{\n\t\tmemset(L1, 0, sizeof(uint64_t) * w_word);\n\t\tmemset(L2, 0, sizeof(short int) * w2);\n\t}\n\n\tvoid init_spa(SketchBase * _spa)\n\t{\n\t\tspa = _spa;\n\t}\n\n\tvoid insert(const char * str)\n\t{\n\t\tuint32_t key;\n\t\tmemcpy(&key, str, 4);\n\t\tint bucket_id = key % bucket_num;\n\t\t// int bucket_id = key & 0x2FF;\n\n\t\t// the code below assume counter per buckets is 16\n\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\n\t\tconst __m128i item = _mm_set1_epi32((int)key);\n\t\tint matched;\n\n\t\tif (counter_num == 16) {\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tmatched = _mm_movemask_epi8(a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse if (counter_num == 4) {\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n\t\t\tcout << bucket_id << endl;\n#endif // DEBUG\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\tmatched = _mm_movemask_ps(*(__m128 *)&a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse {\n\t\t\tthrow std::logic_error(\"Not implemented.\");\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tif (matched != 0) {\n\t\t\t//return 32 if input is zero;\n\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched);\n\n\t\t\t++counter[bucket_id][matched_index];\n\t\t\treturn;\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint cur_pos_now = cur_pos[bucket_id];\n\t\tif (cur_pos_now != counter_num) {\n\t\t\t// printf(\"%d\\n\", cur_pos_now);\n\t\t\tID[bucket_id][cur_pos_now] = key;\n\t\t\t++counter[bucket_id][cur_pos_now];\n\t\t\t++cur_pos[bucket_id];\n\t\t\treturn;\n\t\t}\n\n\t\t/****************randomly choose one counter to kick!******************/\n\t\tchar tmp2[4];\n\t\tmemcpy(tmp2, &ID[bucket_id][cur_kick], 4);\n\t\tinsert_SC(tmp2, counter[bucket_id][cur_kick]);\n\t\tID[bucket_id][cur_kick] = key;\n\t\tcounter[bucket_id][cur_kick] = 1;\n\n\t\tcur_kick = (cur_kick + 1) % counter_num;\n\t}\n\n\tvoid insert_SC(const char * kick_ID, int kick_f)\n\t{\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint v1 = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM_SC];\n\t\tint index[MAX_HASH_NUM_SC];\n\t\tint offset[MAX_HASH_NUM_SC];\n\n\t\tuint64_t hash_value = bobhash1->Run(kick_ID, 4);\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\toffset[i] = (hash_value & 0xF);\n\t\t\tvalue[i] = (temp >> (offset[i] << 2)) & 0xF;\n\t\t\tv1 = std::min(v1, value[i]);\n\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tint temp2 = v1 + kick_f;\n\t\tif (temp2 <= 15) { // maybe optimized use SIMD?\n\t\t\tfor (int i = 0; i < d1; i++) {\n\t\t\t\tint temp3 = ((temp >> (offset[i] << 2)) & 0xF);\n\t\t\t\tif (temp3 < temp2) {\n\t\t\t\t\ttemp += ((uint64_t)(temp2 - temp3) << (offset[i] << 2));\n\t\t\t\t}\n\t\t\t}\n\t\t\tL1[word_index] = temp;\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\ttemp |= ((uint64_t)0xF << (offset[i] << 2));\n\t\t}\n\t\tL1[word_index] = temp;\n\n\t\tint delta1 = 15 - v1;\n\t\tkick_f -= delta1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tindex[i] = (bobhash2[i]->Run(kick_ID, 4)) % w2;\n\t\t\tvalue[i] = L2[index[i]];\n\t\t\tv2 = std::min(value[i], v2);\n\t\t}\n\n\t\ttemp2 = v2 + kick_f;\n\t\tif (temp2 <= threshold) {\n\t\t\tfor (int i = 0; i < d2; i++) {\n\t\t\t\tL2[index[i]] = (L2[index[i]] > temp2) ? L2[index[i]] : temp2;\n\t\t\t}\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tL2[index[i]] = threshold;\n\t\t}\n\n\t\tint delta2 = threshold - v2;\n\t\tkick_f -= delta2;\n\t\tfor (int i = 0; i < kick_f; ++i)\n\t\t{\n\t\t\tspa->Insert(kick_ID, 4);\n\t\t}\n\t}\n\n\tvoid refresh()\n\t{\n\t\tfor (int i = 0; i < bucket_num; i++) {\n\t\t\tfor (int j = 0; j < counter_num; j++) {\n\t\t\t\tinsert_SC(ID[i][j], counter[i][j]);\n\t\t\t\tID[i][j] = counter[i][j] = 0;\n\t\t\t}\n\t\t\tcur_pos[i] = 0;\n\t\t}\n\t\treturn;\n\t}\n\n\tint query(const char * key)\n\t{\n\t\tint v1 = 1 << 30;\n\n\t\t// constexpr int max_d = d1 > d2 ? d1 : d2;\n\t\t// int value[max_d];\n\t\t// int index[max_d];\n\t\t// int offset[max_d];\n\n\t\tuint32_t hash_value = (bobhash1->Run(key, 4));\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\tint of, val;\n\t\t\tof = (hash_value & 0xF);\n\t\t\tval = (temp >> (of << 2)) & 0xF;\n\t\t\tv1 = std::min(val, v1);\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tif (v1 != 15)\n\t\t\treturn v1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tint index = (bobhash2[i]->Run(key, 4)) % w2;\n\t\t\tint value = L2[index];\n\t\t\tv2 = std::min(value, v2);\n\t\t}\n\n\t\treturn v1 + v2;\n\t}\n};\n\n#endif//_SC_H\n/*----optional according to your need----*/\n\n#define T1 15\n#define T2 241\n#define THRESHOLD (T1 + T2)\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ColdFilter_SpaceSaving: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint total_memory_in_bytes;//parameter\n\tint filter_memory_percent;//parameter\n\tint bucket_num;//parameter\n\tint counter_num;//parameter\n\tint capacity;\n\tStreamClassifier<T2, 65> *sc;\n\tDeterministicSpaceSaving sketch;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ColdFilter_SpaceSaving()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ColdFilter_SpaceSaving\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"total_memory_in_bytes\")\n {\n\n\t\t\ttotal_memory_in_bytes = parameterValue;\n return;\n }\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"filter_memory_percent\")\n {\n\t\t\tfilter_memory_percent= parameterValue;\n return;\n }\n if (parameterName==\"bucket_num\")\n {\n\t\t\t bucket_num = parameterValue;\n return;\n }\n\t\t if (parameterName == \"counter_num\")\n\t\t {\n\t\t\t counter_num = parameterValue;\n\t\t\t return;\n\t\t }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tsc = new StreamClassifier<T2, 65>(int64_t(total_memory_in_bytes) * filter_memory_percent / 100, bucket_num, counter_num);\n\t\tsketch.parameterSet(\"capacity\", capacity);\n\t\tsketch.init();\n\t\tsc->init_spa(&sketch);\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tsc->insert(str);\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint ret = sc->query(str);\n\t\tif (ret == THRESHOLD)\n\t\t\tret += sketch.frequencyQuery(str,len);\n\n\t\treturn ret;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n return sketch.topkQuery(k);\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n \n /*----optional according to your need----*/\n }\n ~ColdFilter_SpaceSaving()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\tdelete sc;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ColdFilter_SpaceSaving);\n#endif//DO NOT change this file", "name": "ColdFilter_SpaceSaving", "params": [{"field": "total_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_memory_percent", "help": "", "type": "int"}, {"field": "bucket_num", "help": "", "type": "int"}, {"field": "counter_num", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "path": "skbm/new_sketch/sketch/ColdFilter_SpaceSaving.h"}, {"params": [{"field": "d", "help": "", "type": "int"}, {"field": "word_num", "help": "", "type": "int"}, {"field": "word_size", "help": "", "type": "int"}], "name": "Pyramid_Count", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef Pyramid_Count_H //must change this MACRO\n#define Pyramid_Count_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <algorithm>\n#include <cstring>\n#include <string.h>\n#include <math.h>\n\n#ifndef _PyramidPARAMS_H\n#define _PyramidPARAMS_H\n\n#define MAX_INSERT_PACKAGE 1000000\n\n#define MAX_HASH_NUM_P 20\n\n#define FILTER_SIZE 32\n\n#define COUNTER_SIZE 16\n\n#define LOW_HASH_NUM 4\n\ntypedef long long lint;\ntypedef unsigned int uint;\ntypedef unsigned long long int uint64;\n#endif //_PARAMS_H\nusing namespace std;\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass Pyramid_Count: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint word_num,d,word_size;//parameter\n\tuint64 *counter[60];\n\tbool *flag[60];\n\tint word_index_size, counter_index_size;\n\tint counter_num;\n\tBOBHash * bobhash[MAX_HASH_NUM_P];\n\t//word_num is the number of words in the first level.\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK * heap;\n\tint heap_element_num;\n\tunordered_map<string, uint32_t> ht;\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n Pyramid_Count()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"Pyramid_Count\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"word_num\")\n {\n\n\t\t\tword_num = parameterValue;\n return;\n }\n if (parameterName==\"d\")\n {\n d = parameterValue;\n return;\n }\n if (parameterName==\"word_size\")\n {\n\t\t\tword_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\n\n\n\n\t\tword_num = word_num * 4.0 / 5.0;\n\t\theap_element_num = 0;\n\t\t//for calculating the four hash value constrained in one certain word;\n\t\tword_index_size = 18;\n\n\t\tcounter_index_size = (int)(log(word_size) / log(2)) - 2;//4-8->16-256 counters in one word;\n\t\tcounter_num = (word_num << counter_index_size);\n\n\n\t\tfor (int i = 0; i < 15; i++)\n\t\t{\n\t\t\tcounter[i] = new uint64[word_num >> i];\n\t\t\tflag[i] = new bool[counter_num >> i];\n\n\t\t\tmemset(counter[i], 0, sizeof(uint64) * (word_num >> i));\n\t\t\tmemset(flag[i], false, sizeof(bool) * (counter_num >> i));\n\t\t}\n\n\t\tfor (int i = 0; i < d * 2; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tbobhash[i]->SetSeed(i + 1000);\n\t\t}\n\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\t\tint min_value = 1 << 30;\n\t\t\tint value[MAX_HASH_NUM_P], index[MAX_HASH_NUM_P];\n\n\t\t\tint flag_t = 0xFFFF;\n\n\n\t\t\tint word_index, offset, hash_value;\n\n\t\t\thash_value = (bobhash[0]->Run(str, 4));\n\t\t\tword_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\t\t\n\t\t\thash_value >>= word_index_size;\n\t\t\n\t\t\tfor (int i = 0; i < 2; i++)\n\t\t\t{\n\t\t\t\toffset = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\t\tindex[i] = (word_index << counter_index_size) + offset;\n\n\t\t\t\thash_value >>= counter_index_size;\n\t\t\t}\n\n\t\t\thash_value = (bobhash[1]->Run(str, 4));\n\t\t\tword_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\t\thash_value >>= word_index_size;\n\t\t\n\t\t\tfor (int i = 2; i < 4; i++)\n\t\t\t{\n\t\t\t\toffset = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\t\tindex[i] = (word_index << counter_index_size) + offset;\n\n\t\t\t\thash_value >>= counter_index_size;\n\t\t\t}\n\t\t\t\n\t\t\tfor (int i = 0; i < d; i++)\n\t\t\t{\n\t\t\t\tword_index = (index[i] >> 4);\n\t\t\t\toffset = (index[i] & 0xF);\n\n\n\t\t\t\tif (((flag_t >> offset) & 1) == 0)\n\t\t\t\t\tcontinue;\n\n\t\t\t\tflag_t &= (~(1 << offset));\n\n\t\t\t\t\n\t\t\t\n\t\t\t\tvalue[i] = (counter[0][word_index] >> (offset << 2)) & 0xF;\n\t\t\t\t\n\t\t\t\tint\tg = (bobhash[i + d]->Run(str,4)) % 2;\n\t\t\t\n\t\t\t\t//++\n\t\t\t\tif (g == 0)\n\t\t\t\t{\n\t\t\t\t\t//posi\n\t\t\t\t\t\n\t\t\t\t\tif (flag[0][index[i]] == false)\n\t\t\t\t\t{\n\t\t\t\t\t\tif (value[i] == 15)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] &= (~((uint64)0xF << (offset << 2)));\n\t\t\t\t\t\t\tcarry(index[i]);\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\t//nega\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\tif (value[i] == 1)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] &= (~((uint64)0xF << (offset << 2)));\n\t\t\t\t\t\t\tflag[0][index[i]] = false;\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\t//--\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\t//posi\n\t\t\t\t\n\t\t\t\t\tif (flag[0][index[i]] == false)\n\t\t\t\t\t{\n\t\t\t\t\t\tif (value[i] == 0)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\t\t\tflag[0][index[i]] = true;\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\tif (value[i] == 15)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] &= (~((uint64)0xF << (offset << 2)));\n\n\t\t\t\t\t\t\tdown_carry(index[i]);\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] += ((uint64)0x1 << (offset << 2));\n\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t\n\t\t\t\n\t\t\treturn;\n\t\t\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint temp, temp2;\n\t\tint res[MAX_HASH_NUM], value[MAX_HASH_NUM], index[MAX_HASH_NUM];\n\t\tint flag_t = 0xFFFF;\n\t\tint hash_value;\n\n\t\tint word_index, offset;\n\t\thash_value = (bobhash[0]->Run(str, 4));\n\t\tword_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tfor (int i = 0; i < 2; i++)\n\t\t{\n\t\t\toffset = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = (word_index << counter_index_size) + offset;\n\n\t\t\thash_value >>= counter_index_size;\n\t\t}\n\n\t\thash_value = (bobhash[1]->Run(str,4));\n\t\tword_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tfor (int i = 2; i < 4; i++)\n\t\t{\n\t\t\toffset = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = (word_index << counter_index_size) + offset;\n\n\t\t\thash_value >>= counter_index_size;\n\t\t}\n\n\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tword_index = (index[i] >> 4);\n\t\t\toffset = (index[i] & 0xF);\n\n\n\t\t\tvalue[i] = (counter[0][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tint\tg = (bobhash[i + d]->Run(str, 4)) % 2;\n\n\t\t\tif (flag[0][index[i]] == false)\n\t\t\t\ttemp = value[i] + get_value(index[i]);\n\t\t\telse\n\t\t\t\ttemp = 0 - value[i] + get_value(index[i]);\n\n\t\t\tres[i] = (g == 0 ? temp : -temp);\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tint r;\n\t\tif (d % 2 == 0)\n\t\t{\n\t\t\tr = (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t\telse\n\t\t{\n\t\t\tr = res[d / 2];\n\t\t}\n\t\treturn r;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n\tvoid down_carry(int index)\n\t{\n\t\tint left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\t\tint counter_index;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tcounter_index = (word_index << 4) + offset;\n\n\t\t\tcounter[i][word_index] |= ((uint64)0x1 << (2 + left_or_right + (offset << 2)));\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0x3;\n\n\t\t\t//posi\n\t\t\tif (flag[i][counter_index] == false)\n\t\t\t{\n\t\t\t\tif (value == 0)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\tflag[i][counter_index] = true;\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t}\n\t\t\t//nega\n\t\t\telse\n\t\t\t{\n\t\t\t\tif (value == 3)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] &= (~((uint64)0x3 << (offset << 2)));\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t}\n\n\t\t}\n\t}\n\n\tvoid carry(int index)\n\t{\n\t\tint left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\t\tint counter_index;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tcounter_index = (word_index << 4) + offset;\n\n\t\t\tcounter[i][word_index] |= ((uint64)0x1 << (2 + left_or_right + (offset << 2)));\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0x3;\n\n\t\t\t//posi\n\t\t\tif (flag[i][counter_index] == false)\n\t\t\t{\n\t\t\t\tif (value == 3)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] &= (~((uint64)0x3 << (offset << 2)));\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t}\n\t\t\t//nega\n\t\t\telse\n\t\t\t{\n\t\t\t\tif (value == 1)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\n\t\t\t\t\tflag[i][counter_index] = false;\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\tint get_value(int index)\n\t{\n\t\tint left_or_right;\n\t\tint anti_left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\n\t\tint high_value = 0;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tanti_left_or_right = (left_or_right ^ 1);\n\n\t\t\tword_index >>= 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif (((value >> (2 + left_or_right)) & 1) == 0)\n\t\t\t\treturn high_value;\n\n\t\t\tint t = ((value & 3) - ((value >> (2 + anti_left_or_right)) & 1)) * (1 << (2 + 2 * i));\n\n\t\t\thigh_value += (flag[i][(word_index << 4) + offset] == false) ? t : -t;\n\t\t}\n\t}\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n ~Pyramid_Count()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n /*----optional You can add your function----*/\n};\nREGISTER(Pyramid_Count);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/Pyramid_Count.h"}, {"params": [{"field": "d", "help": "", "type": "int"}, {"field": "word_num", "help": "", "type": "int"}, {"field": "word_size", "help": "", "type": "int"}], "name": "Pyramid_CM", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef Pyramid_CM_H //must change this MACRO\n#define Pyramid_CM_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <algorithm>\n#include <cstring>\n#include <string.h>\n#include <math.h>\n\n#ifndef _PyramidPARAMS_H\n#define _PyramidPARAMS_H\n\n#define MAX_INSERT_PACKAGE 1000000\n\n#define MAX_HASH_NUM_P 20\n\n#define FILTER_SIZE 32\n\n#define COUNTER_SIZE 16\n\n#define LOW_HASH_NUM 4\n\ntypedef long long lint;\ntypedef unsigned int uint;\ntypedef unsigned long long int uint64;\n#endif //_PARAMS_H\nusing namespace std;\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass Pyramid_CM: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint word_num,d,word_size,capacity;//parameter\n\n\tuint64 *counter[60];\n\tint word_index_size, counter_index_size;\n\tint counter_num;\n\tBOBHash * bobhash[MAX_HASH_NUM_P];\n\t//word_num is the number of words in the first level.\n\n\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK * heap;\n\tint heap_element_num;\n\tunordered_map<string, uint32_t> ht;\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n Pyramid_CM()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"Pyramid_CM\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\n if (parameterName==\"word_num\")\n {\n\n\t\t\tword_num = parameterValue;\n return;\n }\n if (parameterName==\"d\")\n {\n d = parameterValue;\n return;\n }\n if (parameterName==\"word_size\")\n {\n\t\t\tword_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\n\n\n\t\t//for calculating the four hash value constrained in one certain word;\n\t\tword_index_size = 18;\n\n\t\tcounter_index_size = (int)(log(word_size) / log(2)) - 2;//4-8->16-256 counters in one word;\n\t\tcounter_num = (word_num << counter_index_size);\n\n\n\t\tfor (int i = 0; i < 15; i++)\n\t\t{\n\t\t\tcounter[i] = new uint64[word_num >> i];\n\t\t\tmemset(counter[i], 0, sizeof(uint64) * (word_num >> i));\n\t\t}\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tbobhash[i]->SetSeed(i + 1000);\n\t\t}\n\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint min_value = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM];\n\t\tint index[MAX_HASH_NUM];\n\t\tint counter_offset[MAX_HASH_NUM];\n\n\t\tuint64 hash_value = (bobhash[0]->Run(str, len));\n\t\tint my_word_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tint flag = 0xFFFF;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tcounter_offset[i] = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = ((my_word_index << counter_index_size) + counter_offset[i]) % counter_num;\n\t\t\thash_value >>= counter_index_size;\n\n\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\n\t\t\tif (((flag >> counter_offset[i]) & 1) == 0)\n\t\t\t\tcontinue;\n\n\t\t\tflag &= (~(1 << counter_offset[i]));\n\n\t\t\tif (value[i] == 15)\n\t\t\t{\n\t\t\t\tcounter[0][my_word_index] &= (~((uint64)0xF << (counter_offset[i] << 2)));\n\t\t\t\tcarry(index[i]);\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tcounter[0][my_word_index] += ((uint64)0x1 << (counter_offset[i] << 2));\n\t\t\t}\n\t\t}\n\t\n\t\t\treturn;\n\t\t\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint min_value = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM];\n\t\tint index[MAX_HASH_NUM];\n\t\tint counter_offset[MAX_HASH_NUM];\n\n\t\tuint64 hash_value = (bobhash[0]->Run(str, len));\n\t\tint my_word_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tcounter_offset[i] = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = ((my_word_index << counter_index_size) + counter_offset[i]) % counter_num;\n\t\t\thash_value >>= counter_index_size;\n\n\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\t\t\tvalue[i] += get_value(index[i]);\n\t\t\tmin_value = value[i] < min_value ? value[i] : min_value;\n\t\t}\n\n\n\t\treturn min_value;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n\tvoid down_carry(int index)\n\t{\n\t\tint left_or_right, up_left_or_right;\n\n\t\tint value, up_value;\n\t\tint word_index = index >> 4, up_word_index;\n\t\tint offset = index & 0xF;\n\t\tint up_offset = offset;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tup_word_index = (word_index >> 1);\n\t\t\tup_left_or_right = up_word_index & 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif ((value & 3) >= 2)\n\t\t\t{\n\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\telse if ((value & 3) == 1)\n\t\t\t{\n\t\t\t\tup_value = (counter[i + 1][up_word_index] >> (up_offset << 2)) & 0xF;\n\n\t\t\t\t//change this layer's flag bit;\n\t\t\t\tif (((up_value >> (2 + up_left_or_right)) & 1) == 0)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] &= (~((uint64)0x1 << (2 + left_or_right + (offset << 2))));\n\t\t\t\t}\n\n\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tcounter[i][word_index] |= ((uint64)0x3 << (offset << 2));\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid carry(int index)\n\t{\n\t\tint left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tcounter[i][word_index] |= ((uint64)0x1 << (2 + left_or_right + (offset << 2)));\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif ((value & 3) != 3)\n\t\t\t{\n\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tcounter[i][word_index] &= (~((uint64)0x3 << (offset << 2)));\n\t\t}\n\t}\n\n\tint get_value(int index)\n\t{\n\t\tint left_or_right;\n\t\tint anti_left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\n\t\tint high_value = 0;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tanti_left_or_right = (left_or_right ^ 1);\n\n\t\t\tword_index >>= 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif (((value >> (2 + left_or_right)) & 1) == 0)\n\t\t\t\treturn high_value;\n\n\t\t\thigh_value += ((value & 3) - ((value >> (2 + anti_left_or_right)) & 1)) * (1 << (2 + 2 * i));\n\t\t}\n\t}\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n ~Pyramid_CM()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n /*----optional You can add your function----*/\n};\nREGISTER(Pyramid_CM);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/Pyramid_CM.h"}, {"params": [{"field": "d", "help": "", "type": "int"}, {"field": "word_num", "help": "", "type": "int"}, {"field": "word_size", "help": "", "type": "int"}], "name": "Pyramid_CM_CU", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef Pyramid_CM_CU_H //must change this MACRO\n#define Pyramid_CM_CU_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <algorithm>\n#include <cstring>\n#include <string.h>\n#include <math.h>\n\n#ifndef _PyramidPARAMS_H\n#define _PyramidPARAMS_H\n\n#define MAX_INSERT_PACKAGE 1000000\n\n#define MAX_HASH_NUM_P 20\n\n#define FILTER_SIZE 32\n\n#define COUNTER_SIZE 16\n\n#define LOW_HASH_NUM 4\n\ntypedef long long lint;\ntypedef unsigned int uint;\ntypedef unsigned long long int uint64;\n#endif //_PARAMS_H\nusing namespace std;\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass Pyramid_CM_CU: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint word_num,d,word_size,capacity;//parameter\n\n\tuint64 *counter[60];\n\tint word_index_size, counter_index_size;\n\tint counter_num;\n\tBOBHash * bobhash[MAX_HASH_NUM_P];\n\t//word_num is the number of words in the first level.\n\n\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK * heap;\n\tint heap_element_num;\n\tunordered_map<string, uint32_t> ht;\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n Pyramid_CM_CU()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"Pyramid_CM_CU\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\n if (parameterName==\"word_num\")\n {\n\n\t\t\tword_num = parameterValue;\n return;\n }\n if (parameterName==\"d\")\n {\n d = parameterValue;\n return;\n }\n if (parameterName==\"word_size\")\n {\n\t\t\tword_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\n\n\n\t\t//for calculating the four hash value constrained in one certain word;\n\t\tword_index_size = 18;\n\n\t\tcounter_index_size = (int)(log(word_size) / log(2)) - 2;//4-8->16-256 counters in one word;\n\t\tcounter_num = (word_num << counter_index_size);\n\n\n\t\tfor (int i = 0; i < 15; i++)\n\t\t{\n\t\t\tcounter[i] = new uint64[word_num >> i];\n\t\t\tmemset(counter[i], 0, sizeof(uint64) * (word_num >> i));\n\t\t}\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tbobhash[i]->SetSeed(i + 1000);\n\t\t}\n\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint min_value = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM];\n\t\tint index[MAX_HASH_NUM];\n\t\tint counter_offset[MAX_HASH_NUM];\n\n\t\tuint64 hash_value = (bobhash[0]->Run(str, len));\n\t\tint my_word_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tcounter_offset[i] = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = ((my_word_index << counter_index_size) + counter_offset[i]) % counter_num;\n\t\t\thash_value >>= counter_index_size;\n\n\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\t\t\tmin_value = value[i] < min_value ? value[i] : min_value;\n\t\t}\n\n\t\tif (min_value != 15)\n\t\t{\n\t\t\tfor (int i = 0; i < d; i++)\n\t\t\t{\n\t\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\t\t\t\tcounter[0][my_word_index] += value[i] == min_value ? ((uint64)0x1 << (counter_offset[i] << 2)) : 0;\n\n\t\t\t}\n\t\t\treturn;\n\t\t}\n\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\n\t\t\tif (value[i] == 0)\n\t\t\t\tcontinue;\n\n\t\t\tcounter[0][my_word_index] &= (~((uint64)0xF << (counter_offset[i] << 2)));\n\t\t\tcarry(index[i]);\n\t\t}\n\t\treturn;\n\t\t\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint min_value = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM];\n\t\tint index[MAX_HASH_NUM];\n\t\tint counter_offset[MAX_HASH_NUM];\n\n\t\tuint64 hash_value = (bobhash[0]->Run(str, len));\n\t\tint my_word_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tcounter_offset[i] = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = ((my_word_index << counter_index_size) + counter_offset[i]) % counter_num;\n\t\t\thash_value >>= counter_index_size;\n\n\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\t\t\tvalue[i] += get_value(index[i]);\n\t\t\tmin_value = value[i] < min_value ? value[i] : min_value;\n\t\t}\n\t\treturn min_value;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n\tvoid down_carry(int index)\n\t{\n\t\tint left_or_right, up_left_or_right;\n\n\t\tint value, up_value;\n\t\tint word_index = index >> 4, up_word_index;\n\t\tint offset = index & 0xF;\n\t\tint up_offset = offset;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tup_word_index = (word_index >> 1);\n\t\t\tup_left_or_right = up_word_index & 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif ((value & 3) >= 2)\n\t\t\t{\n\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\telse if ((value & 3) == 1)\n\t\t\t{\n\t\t\t\tup_value = (counter[i + 1][up_word_index] >> (up_offset << 2)) & 0xF;\n\n\t\t\t\t//change this layer's flag bit;\n\t\t\t\tif (((up_value >> (2 + up_left_or_right)) & 1) == 0)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] &= (~((uint64)0x1 << (2 + left_or_right + (offset << 2))));\n\t\t\t\t}\n\n\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tcounter[i][word_index] |= ((uint64)0x3 << (offset << 2));\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid carry(int index)\n\t{\n\t\tint left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tcounter[i][word_index] |= ((uint64)0x1 << (2 + left_or_right + (offset << 2)));\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif ((value & 3) != 3)\n\t\t\t{\n\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tcounter[i][word_index] &= (~((uint64)0x3 << (offset << 2)));\n\t\t}\n\t}\n\n\tint get_value(int index)\n\t{\n\t\tint left_or_right;\n\t\tint anti_left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\n\t\tint high_value = 0;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tanti_left_or_right = (left_or_right ^ 1);\n\n\t\t\tword_index >>= 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif (((value >> (2 + left_or_right)) & 1) == 0)\n\t\t\t\treturn high_value;\n\n\t\t\thigh_value += ((value & 3) - ((value >> (2 + anti_left_or_right)) & 1)) * (1 << (2 + 2 * i));\n\t\t}\n\t}\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n ~Pyramid_CM_CU()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n /*----optional You can add your function----*/\n};\nREGISTER(Pyramid_CM_CU);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/Pyramid_CM_CU.h"}, {"params": [{"field": "hash_num", "help": "", "type": "int"}, {"field": "memory_in_bytes", "help": "", "type": "int"}], "name": "Count", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef Count_H //must change this MACRO\n#define Count_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <algorithm>\n#include <sstream>\n#include <cstdio>\n#include <cstdlib>\n#include <cstring>\n#include <time.h>\nusing std::min;\nusing std::swap;\n#define SQR(X) (X) * (X)\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass Count: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint memory_in_bytes;//parameter\n\tint hash_num;//parameter\n\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tint w;\n\tint ** c_sketch;\n\tBOBHash * hash;//optional DSAB-builtin hashfunction\n\tBOBHash * hash_polar;\n /*----optional according to your need----*/\n\t\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n Count()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"Count\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"hash_num\")\n {\n\n hash_num = parameterValue;\n return;\n }\n if (parameterName==\"memory_in_bytes\")\n {\n memory_in_bytes = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tw = memory_in_bytes / 4 / hash_num;\n\n c_sketch = new int*[hash_num];\n hash = new BOBHash[hash_num];\n\t\thash_polar = new BOBHash[hash_num];\n\t\tsrand(time(0));\n for (int i = 0; i<hash_num; ++i)\n {\n\t\t\t\n\t\t\thash[i].SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i].SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t\tc_sketch[i] = new int[w];\n\t\t\tmemset(c_sketch[i], 0, sizeof(int)*w);\n }\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint ans[hash_num];\n\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tint idx = hash[i].Run(str, len) % w;\n\t\t\tint polar = hash_polar[i].Run(str, len) % 2;\n\t\t\tc_sketch[i][idx] += polar ? 1 : -1;\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char * str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint ans[1000];\n\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tint idx = hash[i].Run(str,len) % w;\n\t\t\tint polar = hash_polar[i].Run(str, len) % 2;\n\n\t\t\tint val = c_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\n\t\tsort(ans, ans + hash_num);\n\n\t\tint tmin;\n\t\tif (hash_num % 2 == 0) {\n\t\t\ttmin = (ans[hash_num / 2] + ans[hash_num / 2 - 1]) / 2;\n\t\t}\n\t\telse {\n\t\t\ttmin = ans[hash_num / 2];\n\t\t}\n\t\treturn (tmin <= 1) ? 1 : tmin;\n\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\n return topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n for (int i; i<hash_num; ++i)\n {\n memset(c_sketch[i],0,sizeof(int)*w);\n }\n /*----optional according to your need----*/\n }\n ~Count()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n for (int i; i<hash_num; ++i)\n {\n delete [] c_sketch[i];\n }\n delete [] c_sketch;\n delete [] hash;\n\t\tdelete [] hash_polar;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(Count);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/Count.h"}, {"params": [{"field": "mem_in_bytes", "help": "", "type": "int"}, {"field": "level", "help": "", "type": "int"}], "name": "UnivMon_Count", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef UnivMon_Count_H //must change this MACRO\n#define UnivMon_Count_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <cstdlib>\n#include <cstring>\n#include <string>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\n/*----optional according to your need----*/\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\nbool countcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\ntemplate<uint8_t univ_key_len, int capacity, int d = 3>\nstruct CountSketch {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK heap[capacity];\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tCountSketch(int mem_in_bytes_) : mem_in_bytes(mem_in_bytes_), heap_element_num(0) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"CountSketch@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\n\tvoid insert(const char * key) {\n\t\tint ans[d];\n\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tint idx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tint polar = hash_polar[i]->Run(key, univ_key_len) % 2;\n\n\t\t\tcm_sketch[i][idx] += polar ? 1 : -1;\n\n\t\t\tint val = cm_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint ans[d];\n\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tint idx = hash[i]->Run(str, len) % w;\n\t\t\tint polar = hash_polar[i]->Run(str, len) % 2;\n\n\t\t\n\t\t\tint val = cm_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\t\tsort(ans, ans + d);\n\t\tint tmin;\n\t\tif (d % 2 == 0) {\n\t\t\ttmin = (ans[d / 2] + ans[d / 2 - 1]) / 2;\n\t\t}\n\t\telse {\n\t\t\ttmin = ans[d / 2];\n\t\t}\n\t\ttmin = (tmin <= 1) ? 1 : tmin;\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~CountSketch() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\treturn;\n\t}\n};\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<univ_key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass UnivMon_Count: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint mem_in_bytes;//parameter\n\tint level;//parameter\n\tint capacity=1000;\n\ttypedef CountSketch<4, 1000, 3> L2HitterDetector;\n\n L2HitterDetector ** sketches;\n BOBHash ** polar_hash;\n int element_num = 0;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n UnivMon_Count()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"UnivMon_Count\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"mem_in_bytes\")\n {\n\n\t\t\tmem_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"level\")\n {\n\t\t\tlevel = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\t//srand(time(0));\n\t\n\t\tdouble total = (1u << level) - 1;\n\t\tsketches = new L2HitterDetector*[level];\n\n\t\tpolar_hash = new BOBHash*[level];\n\t\t\n\t\tfor (int i = 0; i < level; ++i) {\n\t\t\t//int mem_for_sk = int(mem_in_bytes) - level * (4 + 4) * capacity;\n\t\t\tint mem_for_sk = int(mem_in_bytes);\n\t\t\tint mem = int(mem_for_sk / level);\n\t\t\n\t\t\tsketches[i] = new L2HitterDetector(mem);\n\t\t\n\t\t\tauto idx = uint32_t(rand() % MAX_PRIME32);\n\t\t\tpolar_hash[i] = new BOBHash;\n\t\t\tpolar_hash[i]->SetSeed(idx);\n\t\t}\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\t\n\t\tint polar;\n\t\telement_num++;\n\t\tsketches[0]->insert(str);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tsketches[i]->insert(str);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\t\n\t\tint polar;\n\t\tsketches[0]->insert(str);\n\t\tint res = sketches[0]->frequencyQuery(str, len);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tres = min(res, sketches[i]->frequencyQuery(str, len));\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t\treturn res;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tunordered_map<std::string, int> results;\n\t\tvector<std::pair<std::string, int>> vec_top_k(k);\n\t\tfor (int i = level - 1; i >= 0; --i) {\n\t\t\tsketches[i]->get_top_k_with_frequency(k, vec_top_k);\n\t\t\tfor (auto kv : vec_top_k) {\n\t\t\t\tif (results.find(kv.first) == results.end()) {\n\t\t\t\t\tresults[kv.first] = kv.second;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > curItem;\n\t\tfor (auto & kv : results) {\n\t\t\t\n\t\t\t\tcurItem.emplace_back(kv);\n\t\t}\n\t\tsort(curItem.begin(), curItem.end(), countcurCMP);\n\t\tint t = curItem.size() > k ? k : curItem.size();\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(curItem[i]);\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\telement_num = 0;\n /*----optional according to your need----*/\n }\n ~UnivMon_Count()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n for (int i; i<level; ++i)\n {\n\t\t\tdelete sketches[i];\n\t\t\tdelete polar_hash[i];\n }\n delete [] sketches;\n delete [] polar_hash;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(UnivMon_Count);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/UnivMon_Count.h"}, {"params": [{"field": "mem_in_bytes", "help": "", "type": "int"}, {"field": "level", "help": "", "type": "int"}], "name": "UnivMon_CM", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef UnivMon_CM_H //must change this MACRO\n#define UnivMon_CM_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <cstdlib>\n#include <cstring>\n#include <string>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\n/*----optional according to your need----*/\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\nbool cmcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\ntemplate<uint8_t univ_key_len, int capacity, int d = 3>\nstruct CMSketch {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK heap[capacity];\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tCMSketch(int mem_in_bytes_) : mem_in_bytes(mem_in_bytes_), heap_element_num(0) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"CMSketch@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\n\tvoid insert(const char * key) {\n\t\t\n\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tint idx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tcm_sketch[i][idx] += 1;\n\n\t\n\t\t}\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint idx = hash[0]->Run(str, len) % w;\n\t\tint tmin = cm_sketch[0][idx];\n\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(str, len) % w;\n\t\t\ttmin= min(cm_sketch[i][idx],tmin);\n\t\t}\n\t\t\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~CMSketch() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\treturn;\n\t}\n};\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<univ_key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass UnivMon_CM: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint mem_in_bytes;//parameter\n\tint level;//parameter\n\tint capacity=1000;\n\ttypedef CMSketch<4, 1000, 3> L2HitterDetector;\n\n L2HitterDetector ** sketches;\n BOBHash ** polar_hash;\n int element_num = 0;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n UnivMon_CM()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"UnivMon_CM\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"mem_in_bytes\")\n {\n\n\t\t\tmem_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"level\")\n {\n\t\t\tlevel = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\t//srand(time(0));\n\t\tdouble total = (1u << level) - 1;\n\t\tsketches = new L2HitterDetector*[level];\n\t\tpolar_hash = new BOBHash*[level];\n\t\tfor (int i = 0; i < level; ++i) {\n\t\t\t//int mem_for_sk = int(mem_in_bytes) - level * (4 + 4) * capacity;\n\t\t\tint mem_for_sk = int(mem_in_bytes);\n\t\t\tint mem = int(mem_for_sk / level);\n\t\t\tsketches[i] = new L2HitterDetector(mem);\n\t\t\tauto idx = uint32_t(rand() % MAX_PRIME32);\n\t\t\tpolar_hash[i] = new BOBHash;\n\t\t\tpolar_hash[i]->SetSeed(idx);\n\t\t}\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\telement_num++;\n\t\tsketches[0]->insert(str);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tsketches[i]->insert(str);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\tsketches[0]->insert(str);\n\t\tint res = sketches[0]->frequencyQuery(str, len);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tres = min(res, sketches[i]->frequencyQuery(str, len));\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t\treturn res;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tunordered_map<std::string, int> results;\n\t\tvector<std::pair<std::string, int>> vec_top_k(k);\n\t\tfor (int i = level - 1; i >= 0; --i) {\n\t\t\tsketches[i]->get_top_k_with_frequency(k, vec_top_k);\n\t\t\tfor (auto kv : vec_top_k) {\n\t\t\t\tif (results.find(kv.first) == results.end()) {\n\t\t\t\t\tresults[kv.first] = kv.second;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > curItem;\n\t\tfor (auto & kv : results) {\n\t\t\t\n\t\t\t\tcurItem.emplace_back(kv);\n\t\t}\n\t\tsort(curItem.begin(), curItem.end(), cmcurCMP);\n\t\tint t = curItem.size() > k ? k : curItem.size();\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(curItem[i]);\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\telement_num = 0;\n /*----optional according to your need----*/\n }\n ~UnivMon_CM()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n for (int i; i<level; ++i)\n {\n\t\t\tdelete sketches[i];\n\t\t\tdelete polar_hash[i];\n }\n delete [] sketches;\n delete [] polar_hash;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(UnivMon_CM);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/UnivMon_CM.h"}, {"params": [{"field": "mem_in_bytes", "help": "", "type": "int"}, {"field": "level", "help": "", "type": "int"}], "name": "UnivMon_CM_CU", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef UnivMon_CM_CU_H //must change this MACRO\n#define UnivMon_CM_CU_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <cstdlib>\n#include <cstring>\n#include <string>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\n/*----optional according to your need----*/\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\nbool CMCUcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\ntemplate<uint8_t univ_key_len, int capacity, int d = 3>\nstruct CMCUSketch {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK heap[capacity];\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tCMCUSketch(int mem_in_bytes_) : mem_in_bytes(mem_in_bytes_), heap_element_num(0) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"CMCUSketch@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\n\tvoid insert(const char * key) {\n\t\tint idx = hash[0]->Run(key, univ_key_len) % w;\n\t\tint tmin = cm_sketch[0][idx];\n\t\tint tidx = 0;\n\t\t\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tif (tmin > cm_sketch[i][idx])\n\t\t\t{\n\t\t\t\ttmin =cm_sketch[i][idx];\n\t\t\t\ttidx = i;\n\t\t\t}\n\t\n\t\t}\n\t\tcm_sketch[tidx][hash[tidx]->Run(key, univ_key_len) % w]++;\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint idx = hash[0]->Run(str, len) % w;\n\t\tint tmin = cm_sketch[0][idx];\n\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(str, len) % w;\n\t\t\ttmin= min(cm_sketch[i][idx],tmin);\n\t\t}\n\t\t\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~CMCUSketch() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\treturn;\n\t}\n};\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<univ_key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass UnivMon_CM_CU: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint mem_in_bytes;//parameter\n\tint level;//parameter\n\tint capacity=1000;\n\ttypedef CMCUSketch<4, 1000, 3> L2HitterDetector;\n\n L2HitterDetector ** sketches;\n BOBHash ** polar_hash;\n int element_num = 0;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n UnivMon_CM_CU()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"UnivMon_CM_CU\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"mem_in_bytes\")\n {\n\n\t\t\tmem_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"level\")\n {\n\t\t\tlevel = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\t//srand(time(0));\n\t\tdouble total = (1u << level) - 1;\n\t\tsketches = new L2HitterDetector*[level];\n\t\tpolar_hash = new BOBHash*[level];\n\t\tfor (int i = 0; i < level; ++i) {\n\t\t\t//int mem_for_sk = int(mem_in_bytes) - level * (4 + 4) * capacity;\n\t\t\tint mem_for_sk = int(mem_in_bytes);\n\t\t\tint mem = int(mem_for_sk / level);\n\t\t\tsketches[i] = new L2HitterDetector(mem);\n\t\t\tauto idx = uint32_t(rand() % MAX_PRIME32);\n\t\t\tpolar_hash[i] = new BOBHash;\n\t\t\tpolar_hash[i]->SetSeed(idx);\n\t\t}\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\telement_num++;\n\t\tsketches[0]->insert(str);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tsketches[i]->insert(str);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint polar;\n\t\tsketches[0]->insert(str);\n\t\tint res = sketches[0]->frequencyQuery(str, len);\n\t\tfor (int i = 1; i < level; ++i) {\n\t\t\tpolar = ((polar_hash[i]->Run(str, len))) % 2;\n\t\t\t// cout << polar << endl;\n\t\t\tif (polar) {\n\t\t\t\tres = min(res, sketches[i]->frequencyQuery(str, len));\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t\treturn res;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tunordered_map<std::string, int> results;\n\t\tvector<std::pair<std::string, int>> vec_top_k(k);\n\t\tfor (int i = level - 1; i >= 0; --i) {\n\t\t\tsketches[i]->get_top_k_with_frequency(k, vec_top_k);\n\t\t\tfor (auto kv : vec_top_k) {\n\t\t\t\tif (results.find(kv.first) == results.end()) {\n\t\t\t\t\tresults[kv.first] = kv.second;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > curItem;\n\t\tfor (auto & kv : results) {\n\t\t\t\n\t\t\t\tcurItem.emplace_back(kv);\n\t\t}\n\t\tsort(curItem.begin(), curItem.end(), CMCUcurCMP);\n\t\tint t = curItem.size() > k ? k : curItem.size();\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(curItem[i]);\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\telement_num = 0;\n /*----optional according to your need----*/\n }\n ~UnivMon_CM_CU()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n for (int i; i<level; ++i)\n {\n\t\t\tdelete sketches[i];\n\t\t\tdelete polar_hash[i];\n }\n delete [] sketches;\n delete [] polar_hash;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(UnivMon_CM_CU);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/UnivMon_CM_CU.h"}, {"path": "skbm/new_sketch/sketch/Count_Heap.h", "name": "Count_Heap", "params": [{"field": "capacity", "help": "", "type": "int"}, {"field": "memory_in_bytes", "help": "", "type": "int"}, {"field": "hash_num", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef CountHeap_H //must change this MACRO\n#define CountHeap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <algorithm>\n#include <sstream>\n#include <cstdio>\n#include <cstdlib>\n#include <cstring>\n#include <time.h>\nusing std::min;\nusing std::swap;\n#define SQR(X) (X) * (X)\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass Count_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint capacity;//parameter\n\tint memory_in_bytes;//parameter\n\tint hash_num;//parameter\n\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK * heap;\n\tint heap_element_num;\n\tint w;\n\tint ** c_sketch;\n\tBOBHash * hash;//optional DSAB-builtin hashfunction\n\tBOBHash * hash_polar;\n\tunordered_map<string, uint32_t> ht;\n /*----optional according to your need----*/\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n Count_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"Count_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"hash_num\")\n {\n\n hash_num = parameterValue;\n return;\n }\n if (parameterName==\"memory_in_bytes\")\n {\n memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"capacity\")\n {\n capacity = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tint sketchMem = memory_in_bytes - capacity * 8;\n\t\tw = sketchMem / 4 / hash_num;\n\t\theap = new VK[capacity];\n\t\tmemset(heap, 0, capacity * sizeof(VK));\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\theap_element_num = 0;\n c_sketch = new int*[hash_num];\n hash = new BOBHash[hash_num];\n\t\thash_polar = new BOBHash[hash_num];\n\t\tsrand(time(0));\n for (int i = 0; i<hash_num; ++i)\n {\n\t\t\t\n\t\t\thash[i].SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i].SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t\tc_sketch[i] = new int[w];\n\t\t\tmemset(c_sketch[i], 0, sizeof(int)*w);\n }\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint ans[1000];\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tint idx = hash[i].Run(str, len) % w;\n\t\t\tint polar = hash_polar[i].Run(str, len) % 2;\n\n\t\t\tc_sketch[i][idx] += polar ? 1 : -1;\n\n\t\t\tint val = c_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\n\t\tsort(ans, ans + hash_num);\n\n\t\tint tmin;\n\t\tif (hash_num % 2 == 0) {\n\t\t\ttmin = (ans[hash_num / 2] + ans[hash_num / 2 - 1]) / 2;\n\t\t}\n\t\telse {\n\t\t\ttmin = ans[hash_num / 2];\n\t\t}\n\t\ttmin = (tmin <= 1) ? 1 : tmin;\n\t\tstring str_key = string(str, len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char * str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint ans[hash_num];\n\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tint idx = hash[i].Run(str,len) % w;\n\t\t\tint polar = hash_polar[i].Run(str, len) % 2;\n\n\t\t\tint val = c_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\n\t\tsort(ans, ans + hash_num);\n\n\t\tint tmin;\n\t\tif (hash_num % 2 == 0) {\n\t\t\ttmin = (ans[hash_num / 2] + ans[hash_num / 2 - 1]) / 2;\n\t\t}\n\t\telse {\n\t\t\ttmin = ans[hash_num / 2];\n\t\t}\n\t\treturn (tmin <= 1) ? 1 : tmin;\n\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\ttopkItem.push_back(make_pair(a[capacity - 1 - i].second, a[capacity - 1 - i].first));\n\t\t\n\t\t}\n return topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n for (int i; i<hash_num; ++i)\n {\n memset(c_sketch[i],0,sizeof(int)*w);\n }\n\t\tmemset(heap, 0, sizeof(VK)*capacity);\n\t\theap_element_num = 0;\n /*----optional according to your need----*/\n }\n ~Count_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n for (int i; i<hash_num; ++i)\n {\n delete [] c_sketch[i];\n }\n delete [] c_sketch;\n delete [] hash;\n\t\tdelete [] hash_polar;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(Count_Heap);\n#endif//DO NOT change this file"}, {"path": "skbm/new_sketch/sketch/Pyramid_Count_Heap.h", "name": "Pyramid_Count_Heap", "params": [{"field": "capacity", "help": "", "type": "int"}, {"field": "word_num", "help": "", "type": "int"}, {"field": "d", "help": "", "type": "int"}, {"field": "word_size", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef Pyramid_Count_Heap_H //must change this MACRO\n#define Pyramid_Count_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <algorithm>\n#include <cstring>\n#include <string.h>\n#include <math.h>\n\n#ifndef _PyramidPARAMS_H\n#define _PyramidPARAMS_H\n\n#define MAX_INSERT_PACKAGE 1000000\n\n#define MAX_HASH_NUM_P 20\n\n#define FILTER_SIZE 32\n\n#define COUNTER_SIZE 16\n\n#define LOW_HASH_NUM 4\n\ntypedef long long lint;\ntypedef unsigned int uint;\ntypedef unsigned long long int uint64;\n#endif //_PARAMS_H\nusing namespace std;\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass Pyramid_Count_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint word_num,d,word_size,capacity;//parameter\n\tuint64 *counter[60];\n\tbool *flag[60];\n\tint word_index_size, counter_index_size;\n\tint counter_num;\n\tBOBHash * bobhash[MAX_HASH_NUM_P];\n\t//word_num is the number of words in the first level.\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK * heap;\n\tint heap_element_num;\n\tunordered_map<string, uint32_t> ht;\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n Pyramid_Count_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"Pyramid_Count_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"word_num\")\n {\n\n\t\t\tword_num = parameterValue;\n return;\n }\n if (parameterName==\"d\")\n {\n d = parameterValue;\n return;\n }\n if (parameterName==\"word_size\")\n {\n\t\t\tword_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\theap = new VK[capacity];\n\t\tmemset(heap, 0, capacity * sizeof(VK));\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\n\n\t\theap_element_num = 0;\n\t\t//for calculating the four hash value constrained in one certain word;\n\t\tword_index_size = 18;\n\n\t\tcounter_index_size = (int)(log(word_size) / log(2)) - 2;//4-8->16-256 counters in one word;\n\t\tcounter_num = (word_num << counter_index_size);\n\n\n\t\tfor (int i = 0; i < 15; i++)\n\t\t{\n\t\t\tcounter[i] = new uint64[word_num >> i];\n\t\t\tflag[i] = new bool[counter_num >> i];\n\t\t\tmemset(counter[i], 0, sizeof(uint64) * (word_num >> i));\n\t\t\tmemset(flag[i], false, sizeof(bool) * (counter_num >> i));\n\t\t}\n\n\t\tfor (int i = 0; i < d * 2; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tbobhash[i]->SetSeed(i + 1000);\n\t\t}\n\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\t\tint min_value = 1 << 30;\n\t\t\tint value[MAX_HASH_NUM_P], index[MAX_HASH_NUM_P];\n\n\t\t\tint flag_t = 0xFFFF;\n\n\n\t\t\tint word_index, offset, hash_value;\n\n\t\t\thash_value = (bobhash[0]->Run(str, 4));\n\t\t\tword_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\t\t\n\t\t\thash_value >>= word_index_size;\n\t\t\n\t\t\tfor (int i = 0; i < 2; i++)\n\t\t\t{\n\t\t\t\toffset = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\t\tindex[i] = (word_index << counter_index_size) + offset;\n\n\t\t\t\thash_value >>= counter_index_size;\n\t\t\t}\n\n\t\t\thash_value = (bobhash[1]->Run(str, 4));\n\t\t\tword_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\t\thash_value >>= word_index_size;\n\t\t\n\t\t\tfor (int i = 2; i < 4; i++)\n\t\t\t{\n\t\t\t\toffset = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\t\tindex[i] = (word_index << counter_index_size) + offset;\n\n\t\t\t\thash_value >>= counter_index_size;\n\t\t\t}\n\t\t\t\n\t\t\tfor (int i = 0; i < d; i++)\n\t\t\t{\n\t\t\t\tword_index = (index[i] >> 4);\n\t\t\t\toffset = (index[i] & 0xF);\n\n\n\t\t\t\tif (((flag_t >> offset) & 1) == 0)\n\t\t\t\t\tcontinue;\n\n\t\t\t\tflag_t &= (~(1 << offset));\n\n\t\t\t\t\n\t\t\t\n\t\t\t\tvalue[i] = (counter[0][word_index] >> (offset << 2)) & 0xF;\n\t\t\t\t\n\t\t\t\tint\tg = (bobhash[i + d]->Run(str,4)) % 2;\n\t\t\t\n\t\t\t\t//++\n\t\t\t\tif (g == 0)\n\t\t\t\t{\n\t\t\t\t\t//posi\n\t\t\t\t\t\n\t\t\t\t\tif (flag[0][index[i]] == false)\n\t\t\t\t\t{\n\t\t\t\t\t\tif (value[i] == 15)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] &= (~((uint64)0xF << (offset << 2)));\n\t\t\t\t\t\t\tcarry(index[i]);\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\t//nega\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\tif (value[i] == 1)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] &= (~((uint64)0xF << (offset << 2)));\n\t\t\t\t\t\t\tflag[0][index[i]] = false;\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\t//--\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\t//posi\n\t\t\t\t\n\t\t\t\t\tif (flag[0][index[i]] == false)\n\t\t\t\t\t{\n\t\t\t\t\t\tif (value[i] == 0)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\t\t\tflag[0][index[i]] = true;\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\tif (value[i] == 15)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] &= (~((uint64)0xF << (offset << 2)));\n\n\t\t\t\t\t\t\tdown_carry(index[i]);\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tcounter[0][word_index] += ((uint64)0x1 << (offset << 2));\n\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t\n\t\t\tint tmin = frequencyQuery(str, len);\n\t\t\tstring str_key = string(str, len);\n\t\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\t\theap[ht[str_key]].first++;\n\t\t\t\theap_adjust_down(ht[str_key]);\n\t\t\t}\n\t\t\telse if (heap_element_num < capacity) {\n\t\t\t\theap[heap_element_num].second = str_key;\n\t\t\t\theap[heap_element_num].first = tmin;\n\t\t\t\tht[str_key] = heap_element_num++;\n\t\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t\t}\n\t\t\telse if (tmin > heap[0].first) {\n\t\t\t\tVK & kv = heap[0];\n\t\t\t\tht.erase(kv.second);\n\t\t\t\tkv.second = str_key;\n\t\t\t\tkv.first = tmin;\n\t\t\t\tht[str_key] = 0;\n\t\t\t\theap_adjust_down(0);\n\t\t\t}\n\t\t\treturn;\n\t\t\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint temp, temp2;\n\t\tint res[MAX_HASH_NUM], value[MAX_HASH_NUM], index[MAX_HASH_NUM];\n\t\tint flag_t = 0xFFFF;\n\t\tint hash_value;\n\n\t\tint word_index, offset;\n\t\thash_value = (bobhash[0]->Run(str, 4));\n\t\tword_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tfor (int i = 0; i < 2; i++)\n\t\t{\n\t\t\toffset = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = (word_index << counter_index_size) + offset;\n\n\t\t\thash_value >>= counter_index_size;\n\t\t}\n\n\t\thash_value = (bobhash[1]->Run(str,4));\n\t\tword_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tfor (int i = 2; i < 4; i++)\n\t\t{\n\t\t\toffset = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = (word_index << counter_index_size) + offset;\n\n\t\t\thash_value >>= counter_index_size;\n\t\t}\n\n\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tword_index = (index[i] >> 4);\n\t\t\toffset = (index[i] & 0xF);\n\n\n\t\t\tvalue[i] = (counter[0][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tint\tg = (bobhash[i + d]->Run(str, 4)) % 2;\n\n\t\t\tif (flag[0][index[i]] == false)\n\t\t\t\ttemp = value[i] + get_value(index[i]);\n\t\t\telse\n\t\t\t\ttemp = 0 - value[i] + get_value(index[i]);\n\n\t\t\tres[i] = (g == 0 ? temp : -temp);\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tint r;\n\t\tif (d % 2 == 0)\n\t\t{\n\t\t\tr = (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t\telse\n\t\t{\n\t\t\tr = res[d / 2];\n\t\t}\n\t\treturn r;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\ttopkItem.push_back(make_pair(a[capacity - 1 - i].second, a[capacity - 1 - i].first));\n\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n\tvoid down_carry(int index)\n\t{\n\t\tint left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\t\tint counter_index;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tcounter_index = (word_index << 4) + offset;\n\n\t\t\tcounter[i][word_index] |= ((uint64)0x1 << (2 + left_or_right + (offset << 2)));\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0x3;\n\n\t\t\t//posi\n\t\t\tif (flag[i][counter_index] == false)\n\t\t\t{\n\t\t\t\tif (value == 0)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\tflag[i][counter_index] = true;\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t}\n\t\t\t//nega\n\t\t\telse\n\t\t\t{\n\t\t\t\tif (value == 3)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] &= (~((uint64)0x3 << (offset << 2)));\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t}\n\n\t\t}\n\t}\n\n\tvoid carry(int index)\n\t{\n\t\tint left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\t\tint counter_index;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tcounter_index = (word_index << 4) + offset;\n\n\t\t\tcounter[i][word_index] |= ((uint64)0x1 << (2 + left_or_right + (offset << 2)));\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0x3;\n\n\t\t\t//posi\n\t\t\tif (flag[i][counter_index] == false)\n\t\t\t{\n\t\t\t\tif (value == 3)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] &= (~((uint64)0x3 << (offset << 2)));\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t}\n\t\t\t//nega\n\t\t\telse\n\t\t\t{\n\t\t\t\tif (value == 1)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\n\t\t\t\t\tflag[i][counter_index] = false;\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\tint get_value(int index)\n\t{\n\t\tint left_or_right;\n\t\tint anti_left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\n\t\tint high_value = 0;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tanti_left_or_right = (left_or_right ^ 1);\n\n\t\t\tword_index >>= 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif (((value >> (2 + left_or_right)) & 1) == 0)\n\t\t\t\treturn high_value;\n\n\t\t\tint t = ((value & 3) - ((value >> (2 + anti_left_or_right)) & 1)) * (1 << (2 + 2 * i));\n\n\t\t\thigh_value += (flag[i][(word_index << 4) + offset] == false) ? t : -t;\n\t\t}\n\t}\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n ~Pyramid_Count_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n /*----optional You can add your function----*/\n};\nREGISTER(Pyramid_Count_Heap);\n#endif//DO NOT change this file"}, {"tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "name": "ASketch_Count_Heap", "params": [{"field": "tot_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_size", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "code": "#ifndef ASketch_Count_Heap_H //must change this MACRO\n#define ASketch_Count_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <cstring>\n#include <algorithm> \n#include <x86intrin.h>\n#include <bmiintrin.h>\n#include<memory.h>\n#include <cstdlib>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\nusing namespace std;\n\n#ifndef ACURCMP\n#define ACURCMP\nbool AcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\n#endif // ACURCMP\n\n\n#define MAX_HASH_NUM 16\n#ifndef _PARAMS_H\n#define _PARAMS_H\n\n#include <cstdint>\n\n//#define MAX_INSERT_PACKAGE 32000000\n//#define MAX_HASH_NUM 20\n#define STR_LEN 8\n\ntypedef const unsigned char cuc;\ntypedef unsigned int(*funPtr)(const unsigned char *, unsigned int);\n\nstruct HashFunction {\n\tHashFunction();\n\tstatic uint32_t Str2Int(cuc *str, uint32_t hidx, uint32_t len = STR_LEN);\n\tstatic funPtr hash_function[14];\n};\n\n#define mix(a, b, c) \\\n{ \\\n a -= b; a -= c; a ^= (c>>13); \\\n b -= c; b -= a; b ^= (a<<8); \\\n c -= a; c -= b; c ^= (b>>13); \\\n a -= b; a -= c; a ^= (c>>12); \\\n b -= c; b -= a; b ^= (a<<16); \\\n c -= a; c -= b; c ^= (b>>5); \\\n a -= b; a -= c; a ^= (c>>3); \\\n b -= c; b -= a; b ^= (a<<10); \\\n c -= a; c -= b; c ^= (b>>15); \\\n}\n\nuint32_t BOB1(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 2;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t BOB2(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 31;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t\nBOB3(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 73;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\nuint32_t\nBOB4(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 127;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t RSHash(cuc *str, uint32_t len) {\n\tuint32_t b = 378551;\n\tuint32_t a = 63689;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = hash * a + str[i];\n\t\ta = a * b;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t JSHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 1315423911;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash ^= ((hash << 5) + str[i] + (hash >> 2));\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t PJWHash(cuc *str, uint32_t len) {\n\tuint64_t BitsInUnsignedInt = (uint64_t)32;\n\tuint64_t ThreeQuarters = (uint64_t)((BitsInUnsignedInt * 3) / 4);\n\tuint64_t OneEighth = (uint64_t)(BitsInUnsignedInt / 8);\n\tuint64_t HighBits = (uint64_t)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);\n\tuint64_t hash = 0;\n\tuint64_t test = 0;\n\tfor (uint32_t i = 0; i < len; i++) {\n\t\thash = (hash << OneEighth) + str[i];\n\t\tif ((test = hash & HighBits) != 0) {\n\t\t\thash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t ELFHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tuint64_t x = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash << 4) + str[i];\n\t\tif ((x = hash & 0xF0000000L) != 0) {\n\t\t\thash ^= (x >> 24);\n\t\t}\n\t\thash &= ~x;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t BKDRHash(cuc *str, uint32_t len) {\n\tuint64_t seed = 131;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash * seed) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t SDBMHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = str[i] + (hash << 6) + (hash << 16) - hash;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DJBHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 5381;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) + hash) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DEKHash(cuc *str, uint32_t len) {\n\tuint64_t hash = len;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) ^ (hash << 27)) ^ str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t APHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0xAAAAAAAA;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\tif ((i & 1) == 0) {\n\t\t\thash ^= ((hash << 7) ^ str[i] * (hash >> 3));\n\t\t}\n\t\telse {\n\t\t\thash ^= (~(((hash << 11) + str[i]) ^ (hash >> 5)));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nunsigned int(*HashFunction::hash_function[])(const unsigned char *, unsigned int) = {\n\t&BOB1, &BOB2, &BOB3, &BOB4,\n\t&RSHash, &JSHash,\n\t&PJWHash, &ELFHash,\n\t&BKDRHash, &SDBMHash,\n\t&DJBHash, &DEKHash,\n\t&APHash, &DJBHash\n};\n\nHashFunction::HashFunction() {\n\t/*\n\thash_function[0] = &BOB1;\n\thash_function[1] = &BOB2;\n\thash_function[2] = &BOB3;\n\thash_function[3] = &BOB4;\n\thash_function[4] = &RSHash;\n\thash_function[5] = &JSHash;\n\thash_function[6] = &PJWHash;\n\thash_function[7] = &ELFHash;\n\thash_function[8] = &BKDRHash;\n\thash_function[9] = &SDBMHash;\n\thash_function[10] = &DJBHash;\n\thash_function[11] = &DEKHash;\n\thash_function[12] = &APHash;\n\thash_function[13] = &DJBHash;\n\t*/\n}\n\nuint32_t HashFunction::Str2Int(cuc *str, uint32_t hidx, uint32_t len) {\n\treturn hash_function[hidx](str, len);\n}\n\n#endif //_PARAMS_H\n\n#ifndef _SPA_H\n#define _SPA_H\nusing namespace std;\n\nclass SPA\n{\npublic:\n\tvirtual void insert(unsigned int key, int f) = 0;\n};\n\n#endif // _SPA_H\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\n#ifndef myCountHeap_H\n#define myCountHeap_H\ntemplate<uint8_t univ_key_len, int d = 3>\nstruct myCountHeap {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tint capacity;\n\tVK *heap;\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tmyCountHeap(int mem_in_bytes_, int capacity__) : mem_in_bytes(mem_in_bytes_), heap_element_num(0), capacity(capacity__) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\theap = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"myCountHeap@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\n\tvoid insert(const char * key) {\n\t\tint ans[d];\n\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tint idx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tint polar = hash_polar[i]->Run(key, univ_key_len) % 2;\n\n\t\t\tcm_sketch[i][idx] += polar ? 1 : -1;\n\n\t\t\tint val = cm_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\n\t\tsort(ans, ans + d);\n\n\t\tint tmin;\n\t\tif (d % 2 == 0) {\n\t\t\ttmin = (ans[d / 2] + ans[d / 2 - 1]) / 2;\n\t\t}\n\t\telse {\n\t\t\ttmin = ans[d / 2];\n\t\t}\n\t\ttmin = (tmin <= 1) ? 1 : tmin;\n\n\t\tstring str_key = string(key, univ_key_len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint ans[d];\n\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tint idx = hash[i]->Run(str, len) % w;\n\t\t\tint polar = hash_polar[i]->Run(str, len) % 2;\n\n\n\t\t\tint val = cm_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\t\tsort(ans, ans + d);\n\t\tint tmin;\n\t\tif (d % 2 == 0) {\n\t\t\ttmin = (ans[d / 2] + ans[d / 2 - 1]) / 2;\n\t\t}\n\t\telse {\n\t\t\ttmin = ans[d / 2];\n\t\t}\n\t\ttmin = (tmin <= 1) ? 1 : tmin;\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\t\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~myCountHeap() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\tdelete[] heap;\n\t\treturn;\n\t}\n};\n#endif\n\n\n\n/*----optional according to your need----*/\n\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ASketch_Count_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint tot_memory_in_bytes,filter_size,capacity;//parameter\n\tint w;\n\tint bucket_num;\n\n\tint *new_count;\n\tint *old_count;\n\tuint32_t *items;\n\n\tint cur_pos;\n\tint d = 3;\n\tmyCountHeap<4,3> * sketch;\n\t\n\n\tBOBHash *bobhash[MAX_HASH_NUM];\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ASketch_Count_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ASketch_Count_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"tot_memory_in_bytes\")\n {\n\n\t\t\ttot_memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"filter_size\")\n {\n\t\t\tfilter_size = parameterValue;\n return;\n }\n if (parameterName==\"capacity\")\n {\n capacity = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tw = tot_memory_in_bytes - filter_size * 12;\n\t\tw = w - capacity * 8;\n\t\tsketch = new myCountHeap<4, 3>(w, capacity);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\t\t\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tsrand(time(0));\n\t\t\tbobhash[i]->SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t}\n /*----optional according to your need----*/\n }\n\tuint32_t * get_items()\n\t{\n\t\treturn items;\n\t}\n\n\tint * get_freq()\n\t{\n\t\treturn new_count;\n\t}\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n /*----optional according to your need----*/\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\tnew_count[matched_index] += 1;\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t\tif (cur_pos != filter_size)\n\t\t{\n\t\t\titems[cur_pos] = *key;\n\t\t\tnew_count[cur_pos] = 1;\n\t\t\told_count[cur_pos] = 0;\n\t\t\tcur_pos++;\n\t\t\treturn;\n\t\t}\n\t\tint estimate_value, min_index, min_value, temp;\n\t\tsketch->insert(str);\n\t\testimate_value = sketch->frequencyQuery(str, len);\n\t\tmin_index = 0;\n\t\tmin_value = (1 << 30);\n\t\tfor (int i = 0; i < filter_size; i++)\n\t\t{\n\t\t\tif (items[i] != (uint32_t)(-1) && min_value > new_count[i])\n\t\t\t{\n\t\t\t\tmin_value = new_count[i];\n\t\t\t\tmin_index = i;\n\t\t\t}\n\t\t}\n\t\tif (estimate_value > min_value)\n\t\t{\n\t\t\ttemp = new_count[min_index] - old_count[min_index];\n\t\t\tfor(int i =0;i<temp;++i)\n\t\t\t\tsketch->insert(str);\n\t\t\titems[min_index] = *key;\n\t\t\tnew_count[min_index] = estimate_value;\n\t\t\told_count[min_index] = estimate_value;\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\t\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\treturn new_count[matched_index];\n\t\t\t}\n\t\t}\n\t\treturn sketch->frequencyQuery(str, len);\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > sketchTopK(k);\n\t\tstd::unordered_map<std::string, int> allTopk;\n\t\n\t\tsketch->get_top_k_with_frequency(k, sketchTopK);\n\t\t\n\t\tfor (int i = 0; i < sketchTopK.size(); ++i)\n\t\t{\n\t\t\tallTopk[sketchTopK[i].first] = sketchTopK[i].second;\n\t\t}\n\t\t\n\t\tfor (int i = 0; i < cur_pos; ++i)\n\t\t{\n\t\t\tchar str[4];\n\t\t\tmemcpy(str, &items[i], 4);\n\t\t\tstring key(str, 4);\n\t\t\tallTopk[key] = new_count[i];\n\t\t}\n\t\n\t\tstd::vector<std::pair <std::string, int> > curItem;\n\t\tfor (auto & kv : allTopk)\n\t\t{\n\t\t\tcurItem.emplace_back(kv);\n\t\t}\n\n\t\tsort(curItem.begin(), curItem.end(), AcurCMP);\n\t\tint t = curItem.size() > k ? k : curItem.size();\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(curItem[i]);\n\t\t}\n\t\t\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\tsketch = new myCountHeap<4, 3>(w, capacity);\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n /*----optional according to your need----*/\n }\n ~ASketch_Count_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\t\n\t\tdelete[] new_count;\n\t\tdelete[] old_count;\n\t\tdelete[] items;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ASketch_Count_Heap);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/ASketch_Count_Heap.h"}, {"tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "name": "ASketch_CM_Heap", "params": [{"field": "tot_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_size", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "code": "#ifndef ASketch_CM_Heap_H //must change this MACRO\n#define ASketch_CM_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <cstring>\n#include <algorithm> \n#include <x86intrin.h>\n#include <bmiintrin.h>\n#include<memory.h>\n#include <cstdlib>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\nusing namespace std;\n\n#ifndef ACURCMP\n#define ACURCMP\nbool AcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\n#endif // ACURCMP\n\n\n#define MAX_HASH_NUM 16\n#ifndef _PARAMS_H\n#define _PARAMS_H\n\n#include <cstdint>\n\n//#define MAX_INSERT_PACKAGE 32000000\n//#define MAX_HASH_NUM 20\n#define STR_LEN 8\n\ntypedef const unsigned char cuc;\ntypedef unsigned int(*funPtr)(const unsigned char *, unsigned int);\n\nstruct HashFunction {\n\tHashFunction();\n\tstatic uint32_t Str2Int(cuc *str, uint32_t hidx, uint32_t len = STR_LEN);\n\tstatic funPtr hash_function[14];\n};\n\n#define mix(a, b, c) \\\n{ \\\n a -= b; a -= c; a ^= (c>>13); \\\n b -= c; b -= a; b ^= (a<<8); \\\n c -= a; c -= b; c ^= (b>>13); \\\n a -= b; a -= c; a ^= (c>>12); \\\n b -= c; b -= a; b ^= (a<<16); \\\n c -= a; c -= b; c ^= (b>>5); \\\n a -= b; a -= c; a ^= (c>>3); \\\n b -= c; b -= a; b ^= (a<<10); \\\n c -= a; c -= b; c ^= (b>>15); \\\n}\n\nuint32_t BOB1(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 2;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t BOB2(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 31;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t\nBOB3(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 73;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\nuint32_t\nBOB4(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 127;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t RSHash(cuc *str, uint32_t len) {\n\tuint32_t b = 378551;\n\tuint32_t a = 63689;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = hash * a + str[i];\n\t\ta = a * b;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t JSHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 1315423911;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash ^= ((hash << 5) + str[i] + (hash >> 2));\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t PJWHash(cuc *str, uint32_t len) {\n\tuint64_t BitsInUnsignedInt = (uint64_t)32;\n\tuint64_t ThreeQuarters = (uint64_t)((BitsInUnsignedInt * 3) / 4);\n\tuint64_t OneEighth = (uint64_t)(BitsInUnsignedInt / 8);\n\tuint64_t HighBits = (uint64_t)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);\n\tuint64_t hash = 0;\n\tuint64_t test = 0;\n\tfor (uint32_t i = 0; i < len; i++) {\n\t\thash = (hash << OneEighth) + str[i];\n\t\tif ((test = hash & HighBits) != 0) {\n\t\t\thash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t ELFHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tuint64_t x = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash << 4) + str[i];\n\t\tif ((x = hash & 0xF0000000L) != 0) {\n\t\t\thash ^= (x >> 24);\n\t\t}\n\t\thash &= ~x;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t BKDRHash(cuc *str, uint32_t len) {\n\tuint64_t seed = 131;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash * seed) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t SDBMHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = str[i] + (hash << 6) + (hash << 16) - hash;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DJBHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 5381;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) + hash) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DEKHash(cuc *str, uint32_t len) {\n\tuint64_t hash = len;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) ^ (hash << 27)) ^ str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t APHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0xAAAAAAAA;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\tif ((i & 1) == 0) {\n\t\t\thash ^= ((hash << 7) ^ str[i] * (hash >> 3));\n\t\t}\n\t\telse {\n\t\t\thash ^= (~(((hash << 11) + str[i]) ^ (hash >> 5)));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nunsigned int(*HashFunction::hash_function[])(const unsigned char *, unsigned int) = {\n\t&BOB1, &BOB2, &BOB3, &BOB4,\n\t&RSHash, &JSHash,\n\t&PJWHash, &ELFHash,\n\t&BKDRHash, &SDBMHash,\n\t&DJBHash, &DEKHash,\n\t&APHash, &DJBHash\n};\n\nHashFunction::HashFunction() {\n\t/*\n\thash_function[0] = &BOB1;\n\thash_function[1] = &BOB2;\n\thash_function[2] = &BOB3;\n\thash_function[3] = &BOB4;\n\thash_function[4] = &RSHash;\n\thash_function[5] = &JSHash;\n\thash_function[6] = &PJWHash;\n\thash_function[7] = &ELFHash;\n\thash_function[8] = &BKDRHash;\n\thash_function[9] = &SDBMHash;\n\thash_function[10] = &DJBHash;\n\thash_function[11] = &DEKHash;\n\thash_function[12] = &APHash;\n\thash_function[13] = &DJBHash;\n\t*/\n}\n\nuint32_t HashFunction::Str2Int(cuc *str, uint32_t hidx, uint32_t len) {\n\treturn hash_function[hidx](str, len);\n}\n\n#endif //_PARAMS_H\n\n#ifndef _SPA_H\n#define _SPA_H\nusing namespace std;\n\nclass SPA\n{\npublic:\n\tvirtual void insert(unsigned int key, int f) = 0;\n};\n\n#endif // _SPA_H\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\n#ifndef myCMHeap_H\n#define myCMHeap_H\ntemplate<uint8_t univ_key_len, int d = 3>\nstruct myCMHeap {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tint capacity;\n\tVK *heap;\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tmyCMHeap(int mem_in_bytes_, int capacity__) : mem_in_bytes(mem_in_bytes_), heap_element_num(0), capacity(capacity__) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\theap = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"myCMHeap@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\tvoid insert(const char * key) {\n\n\t\tint idx = hash[0]->Run(key, univ_key_len) % w;\n\t\tcm_sketch[0][idx] += 1;\n\t\tint tmin = cm_sketch[0][idx];\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tcm_sketch[i][idx] += 1;\n\t\t\ttmin = min(tmin, cm_sketch[i][idx]);\n\t\t}\n\n\n\n\t\tstring str_key = string(key, univ_key_len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint idx = hash[0]->Run(str, univ_key_len) % w;\n\t\tint tmin = cm_sketch[0][idx];\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(str, univ_key_len) % w;\n\t\t\ttmin = min(tmin, cm_sketch[i][idx]);\n\t\t}\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~myCMHeap() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\tdelete[] heap;\n\t\treturn;\n\t}\n};\n#endif\n\n\n\n/*----optional according to your need----*/\n\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ASketch_CM_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint tot_memory_in_bytes,filter_size,capacity;//parameter\n\tint w;\n\tint bucket_num;\n\n\tint *new_count;\n\tint *old_count;\n\tuint32_t *items;\n\n\tint cur_pos;\n\tint d = 3;\n\tmyCMHeap<4,3> * sketch;\n\t\n\n\tBOBHash *bobhash[MAX_HASH_NUM];\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ASketch_CM_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ASketch_CM_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"tot_memory_in_bytes\")\n {\n\n\t\t\ttot_memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"filter_size\")\n {\n\t\t\tfilter_size = parameterValue;\n return;\n }\n if (parameterName==\"capacity\")\n {\n capacity = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tw = tot_memory_in_bytes - filter_size * 12;\n\t\tw = w - capacity * 8;\n\t\tsketch = new myCMHeap<4, 3>(w, capacity);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\t\t\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tsrand(time(0));\n\t\t\tbobhash[i]->SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t}\n /*----optional according to your need----*/\n }\n\tuint32_t * get_items()\n\t{\n\t\treturn items;\n\t}\n\n\tint * get_freq()\n\t{\n\t\treturn new_count;\n\t}\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n /*----optional according to your need----*/\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\tnew_count[matched_index] += 1;\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t\tif (cur_pos != filter_size)\n\t\t{\n\t\t\titems[cur_pos] = *key;\n\t\t\tnew_count[cur_pos] = 1;\n\t\t\told_count[cur_pos] = 0;\n\t\t\tcur_pos++;\n\t\t\treturn;\n\t\t}\n\t\tint estimate_value, min_index, min_value, temp;\n\t\tsketch->insert(str);\n\t\testimate_value = sketch->frequencyQuery(str, len);\n\t\tmin_index = 0;\n\t\tmin_value = (1 << 30);\n\t\tfor (int i = 0; i < filter_size; i++)\n\t\t{\n\t\t\tif (items[i] != (uint32_t)(-1) && min_value > new_count[i])\n\t\t\t{\n\t\t\t\tmin_value = new_count[i];\n\t\t\t\tmin_index = i;\n\t\t\t}\n\t\t}\n\t\tif (estimate_value > min_value)\n\t\t{\n\t\t\ttemp = new_count[min_index] - old_count[min_index];\n\t\t\tfor(int i =0;i<temp;++i)\n\t\t\t\tsketch->insert(str);\n\t\t\titems[min_index] = *key;\n\t\t\tnew_count[min_index] = estimate_value;\n\t\t\told_count[min_index] = estimate_value;\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\treturn new_count[matched_index];\n\t\t\t}\n\t\t}\n\t\treturn sketch->frequencyQuery(str, len);\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > sketchTopK(k);\n\t\tstd::unordered_map<std::string, int> allTopk;\n\t\n\t\tsketch->get_top_k_with_frequency(k, sketchTopK);\n\t\t\n\t\tfor (int i = 0; i < sketchTopK.size(); ++i)\n\t\t{\n\t\t\tallTopk[sketchTopK[i].first] = sketchTopK[i].second;\n\t\t}\n\t\n\t\tfor (int i = 0; i < cur_pos; ++i)\n\t\t{\n\t\t\tchar str[4];\n\t\t\tmemcpy(str, &items[i], 4);\n\t\t\tstring key(str, 4);\n\t\t\tallTopk[key] = new_count[i];\n\t\t}\n\t\t\n\t\tstd::vector<std::pair <std::string, int> > curItem;\n\t\tfor (auto & kv : allTopk)\n\t\t{\n\t\t\tcurItem.emplace_back(kv);\n\t\t}\n\t\t\n\t\tsort(curItem.begin(), curItem.end(), AcurCMP);\n\t\tint t = curItem.size() > k ? k : curItem.size();\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(curItem[i]);\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\tsketch = new myCMHeap<4, 3>(w, capacity);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n /*----optional according to your need----*/\n }\n ~ASketch_CM_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\t\n\t\tdelete[] new_count;\n\t\tdelete[] old_count;\n\t\tdelete[] items;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ASketch_CM_Heap);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/ASketch_CM_Heap.h"}, {"tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "name": "ASketch_CM_CU_Heap", "params": [{"field": "tot_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_size", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "code": "#ifndef ASketch_CM_CU_Heap_H //must change this MACRO\n#define ASketch_CM_CU_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <cstring>\n#include <algorithm> \n#include <x86intrin.h>\n#include <bmiintrin.h>\n#include<memory.h>\n#include <cstdlib>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\nusing namespace std;\n\n#ifndef ACURCMP\n#define ACURCMP\nbool AcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\n#endif // ACURCMP\n\n\n#define MAX_HASH_NUM 16\n#ifndef _PARAMS_H\n#define _PARAMS_H\n\n#include <cstdint>\n\n//#define MAX_INSERT_PACKAGE 32000000\n//#define MAX_HASH_NUM 20\n#define STR_LEN 8\n\ntypedef const unsigned char cuc;\ntypedef unsigned int(*funPtr)(const unsigned char *, unsigned int);\n\nstruct HashFunction {\n\tHashFunction();\n\tstatic uint32_t Str2Int(cuc *str, uint32_t hidx, uint32_t len = STR_LEN);\n\tstatic funPtr hash_function[14];\n};\n\n#define mix(a, b, c) \\\n{ \\\n a -= b; a -= c; a ^= (c>>13); \\\n b -= c; b -= a; b ^= (a<<8); \\\n c -= a; c -= b; c ^= (b>>13); \\\n a -= b; a -= c; a ^= (c>>12); \\\n b -= c; b -= a; b ^= (a<<16); \\\n c -= a; c -= b; c ^= (b>>5); \\\n a -= b; a -= c; a ^= (c>>3); \\\n b -= c; b -= a; b ^= (a<<10); \\\n c -= a; c -= b; c ^= (b>>15); \\\n}\n\nuint32_t BOB1(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 2;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t BOB2(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 31;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t\nBOB3(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 73;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\nuint32_t\nBOB4(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 127;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t RSHash(cuc *str, uint32_t len) {\n\tuint32_t b = 378551;\n\tuint32_t a = 63689;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = hash * a + str[i];\n\t\ta = a * b;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t JSHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 1315423911;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash ^= ((hash << 5) + str[i] + (hash >> 2));\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t PJWHash(cuc *str, uint32_t len) {\n\tuint64_t BitsInUnsignedInt = (uint64_t)32;\n\tuint64_t ThreeQuarters = (uint64_t)((BitsInUnsignedInt * 3) / 4);\n\tuint64_t OneEighth = (uint64_t)(BitsInUnsignedInt / 8);\n\tuint64_t HighBits = (uint64_t)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);\n\tuint64_t hash = 0;\n\tuint64_t test = 0;\n\tfor (uint32_t i = 0; i < len; i++) {\n\t\thash = (hash << OneEighth) + str[i];\n\t\tif ((test = hash & HighBits) != 0) {\n\t\t\thash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t ELFHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tuint64_t x = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash << 4) + str[i];\n\t\tif ((x = hash & 0xF0000000L) != 0) {\n\t\t\thash ^= (x >> 24);\n\t\t}\n\t\thash &= ~x;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t BKDRHash(cuc *str, uint32_t len) {\n\tuint64_t seed = 131;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash * seed) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t SDBMHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = str[i] + (hash << 6) + (hash << 16) - hash;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DJBHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 5381;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) + hash) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DEKHash(cuc *str, uint32_t len) {\n\tuint64_t hash = len;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) ^ (hash << 27)) ^ str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t APHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0xAAAAAAAA;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\tif ((i & 1) == 0) {\n\t\t\thash ^= ((hash << 7) ^ str[i] * (hash >> 3));\n\t\t}\n\t\telse {\n\t\t\thash ^= (~(((hash << 11) + str[i]) ^ (hash >> 5)));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nunsigned int(*HashFunction::hash_function[])(const unsigned char *, unsigned int) = {\n\t&BOB1, &BOB2, &BOB3, &BOB4,\n\t&RSHash, &JSHash,\n\t&PJWHash, &ELFHash,\n\t&BKDRHash, &SDBMHash,\n\t&DJBHash, &DEKHash,\n\t&APHash, &DJBHash\n};\n\nHashFunction::HashFunction() {\n\t/*\n\thash_function[0] = &BOB1;\n\thash_function[1] = &BOB2;\n\thash_function[2] = &BOB3;\n\thash_function[3] = &BOB4;\n\thash_function[4] = &RSHash;\n\thash_function[5] = &JSHash;\n\thash_function[6] = &PJWHash;\n\thash_function[7] = &ELFHash;\n\thash_function[8] = &BKDRHash;\n\thash_function[9] = &SDBMHash;\n\thash_function[10] = &DJBHash;\n\thash_function[11] = &DEKHash;\n\thash_function[12] = &APHash;\n\thash_function[13] = &DJBHash;\n\t*/\n}\n\nuint32_t HashFunction::Str2Int(cuc *str, uint32_t hidx, uint32_t len) {\n\treturn hash_function[hidx](str, len);\n}\n\n#endif //_PARAMS_H\n\n#ifndef _SPA_H\n#define _SPA_H\nusing namespace std;\n\nclass SPA\n{\npublic:\n\tvirtual void insert(unsigned int key, int f) = 0;\n};\n\n#endif // _SPA_H\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\n#ifndef myCMCUHeap_H\n#define myCMCUHeap_H\ntemplate<uint8_t univ_key_len, int d = 3>\nstruct myCMCUHeap {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tint capacity;\n\tVK *heap;\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tmyCMCUHeap(int mem_in_bytes_, int capacity__) : mem_in_bytes(mem_in_bytes_), heap_element_num(0), capacity(capacity__) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\theap = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"myCMCUHeap@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\n\tvoid insert(const char * key) {\n\n\t\tint idx = hash[0]->Run(key, univ_key_len) % w;\n\t\tint tidx = 0;\n\t\tint tmin = cm_sketch[0][idx];\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tif (cm_sketch[i][idx] < tmin)\n\t\t\t{\n\t\t\t\ttmin = cm_sketch[i][idx];\n\t\t\t\ttidx = i;\n\t\t\t}\n\t\t}\n\t\ttmin = ++cm_sketch[tidx][hash[tidx]->Run(key, univ_key_len) % w];\n\n\n\n\t\tstring str_key = string(key, univ_key_len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint idx = hash[0]->Run(str, univ_key_len) % w;\n\t\tint tmin = cm_sketch[0][idx];\n\t\tfor (int i = 1; i < d; ++i) {\n\t\t\tidx = hash[i]->Run(str, univ_key_len) % w;\n\t\t\ttmin = min(tmin, cm_sketch[i][idx]);\n\t\t}\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~myCMCUHeap() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\tdelete[] heap;\n\t\treturn;\n\t}\n};\n#endif // !myCMCUHeap_H\n\n\n\n\n/*----optional according to your need----*/\n\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ASketch_CM_CU_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint tot_memory_in_bytes,filter_size,capacity;//parameter\n\tint w;\n\tint bucket_num;\n\n\tint *new_count;\n\tint *old_count;\n\tuint32_t *items;\n\n\tint cur_pos;\n\tint d = 3;\n\tmyCMCUHeap<4,3> * sketch;\n\t\n\n\tBOBHash *bobhash[MAX_HASH_NUM];\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ASketch_CM_CU_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ASketch_CM_CU_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"tot_memory_in_bytes\")\n {\n\n\t\t\ttot_memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"filter_size\")\n {\n\t\t\tfilter_size = parameterValue;\n return;\n }\n if (parameterName==\"capacity\")\n {\n capacity = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tw = tot_memory_in_bytes - filter_size * 12;\n\t\tw = w - capacity * 8;\n\t\tsketch = new myCMCUHeap<4, 3>(w, capacity);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\t\t\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tsrand(time(0));\n\t\t\tbobhash[i]->SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t}\n /*----optional according to your need----*/\n }\n\tuint32_t * get_items()\n\t{\n\t\treturn items;\n\t}\n\n\tint * get_freq()\n\t{\n\t\treturn new_count;\n\t}\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n /*----optional according to your need----*/\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\tnew_count[matched_index] += 1;\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t\tif (cur_pos != filter_size)\n\t\t{\n\t\t\titems[cur_pos] = *key;\n\t\t\tnew_count[cur_pos] = 1;\n\t\t\told_count[cur_pos] = 0;\n\t\t\tcur_pos++;\n\t\t\treturn;\n\t\t}\n\t\tint estimate_value, min_index, min_value, temp;\n\t\tsketch->insert(str);\n\t\testimate_value = sketch->frequencyQuery(str, len);\n\t\tmin_index = 0;\n\t\tmin_value = (1 << 30);\n\t\tfor (int i = 0; i < filter_size; i++)\n\t\t{\n\t\t\tif (items[i] != (uint32_t)(-1) && min_value > new_count[i])\n\t\t\t{\n\t\t\t\tmin_value = new_count[i];\n\t\t\t\tmin_index = i;\n\t\t\t}\n\t\t}\n\t\tif (estimate_value > min_value)\n\t\t{\n\t\t\ttemp = new_count[min_index] - old_count[min_index];\n\t\t\tfor(int i =0;i<temp;++i)\n\t\t\t\tsketch->insert(str);\n\t\t\titems[min_index] = *key;\n\t\t\tnew_count[min_index] = estimate_value;\n\t\t\told_count[min_index] = estimate_value;\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\treturn new_count[matched_index];\n\t\t\t}\n\t\t}\n\t\treturn sketch->frequencyQuery(str, len);\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > sketchTopK(k);\n\t\tstd::unordered_map<std::string, int> allTopk;\n\t\n\t\tsketch->get_top_k_with_frequency(k, sketchTopK);\n\t\t\n\t\tfor (int i = 0; i < sketchTopK.size(); ++i)\n\t\t{\n\t\t\tallTopk[sketchTopK[i].first] = sketchTopK[i].second;\n\t\t}\n\t\n\t\tfor (int i = 0; i < cur_pos; ++i)\n\t\t{\n\t\t\tchar str[4];\n\t\t\tmemcpy(str, &items[i], 4);\n\t\t\tstring key(str, 4);\n\t\t\tallTopk[key] = new_count[i];\n\t\t}\n\t\t\n\t\tstd::vector<std::pair <std::string, int> > curItem;\n\t\tfor (auto & kv : allTopk)\n\t\t{\n\t\t\tcurItem.emplace_back(kv);\n\t\t}\n\t\t\n\t\tsort(curItem.begin(), curItem.end(), AcurCMP);\n\t\tint t = curItem.size() > k ? k : curItem.size();\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(curItem[i]);\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\tsketch = new myCMCUHeap<4, 3>(w, capacity);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n /*----optional according to your need----*/\n }\n ~ASketch_CM_CU_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\t\n\t\tdelete[] new_count;\n\t\tdelete[] old_count;\n\t\tdelete[] items;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ASketch_CM_CU_Heap);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/ASketch_CM_CU_Heap.h"}, {"tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "name": "ASketch_CM", "params": [{"field": "tot_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_size", "help": "", "type": "int"}], "code": "#ifndef ASketch_CM_H //must change this MACRO\n#define ASketch_CM_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n#include \"../atomSketch/myCMSketch.h\" //If you want to use DSAB-builtin sketch must include this\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <cstring>\n#include <algorithm> \n#include <x86intrin.h>\n#include <bmiintrin.h>\n#include<memory.h>\n#include <cstdlib>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\nusing namespace std;\n\n#ifndef ACURCMP\n#define ACURCMP\nbool AcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\n#endif // ACURCMP\n\n\n#define MAX_HASH_NUM 16\n#ifndef _PARAMS_H\n#define _PARAMS_H\n\n#include <cstdint>\n\n//#define MAX_INSERT_PACKAGE 32000000\n//#define MAX_HASH_NUM 20\n#define STR_LEN 8\n\ntypedef const unsigned char cuc;\ntypedef unsigned int(*funPtr)(const unsigned char *, unsigned int);\n\nstruct HashFunction {\n\tHashFunction();\n\tstatic uint32_t Str2Int(cuc *str, uint32_t hidx, uint32_t len = STR_LEN);\n\tstatic funPtr hash_function[14];\n};\n\n#define mix(a, b, c) \\\n{ \\\n a -= b; a -= c; a ^= (c>>13); \\\n b -= c; b -= a; b ^= (a<<8); \\\n c -= a; c -= b; c ^= (b>>13); \\\n a -= b; a -= c; a ^= (c>>12); \\\n b -= c; b -= a; b ^= (a<<16); \\\n c -= a; c -= b; c ^= (b>>5); \\\n a -= b; a -= c; a ^= (c>>3); \\\n b -= c; b -= a; b ^= (a<<10); \\\n c -= a; c -= b; c ^= (b>>15); \\\n}\n\nuint32_t BOB1(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 2;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t BOB2(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 31;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t\nBOB3(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 73;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\nuint32_t\nBOB4(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 127;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t RSHash(cuc *str, uint32_t len) {\n\tuint32_t b = 378551;\n\tuint32_t a = 63689;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = hash * a + str[i];\n\t\ta = a * b;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t JSHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 1315423911;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash ^= ((hash << 5) + str[i] + (hash >> 2));\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t PJWHash(cuc *str, uint32_t len) {\n\tuint64_t BitsInUnsignedInt = (uint64_t)32;\n\tuint64_t ThreeQuarters = (uint64_t)((BitsInUnsignedInt * 3) / 4);\n\tuint64_t OneEighth = (uint64_t)(BitsInUnsignedInt / 8);\n\tuint64_t HighBits = (uint64_t)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);\n\tuint64_t hash = 0;\n\tuint64_t test = 0;\n\tfor (uint32_t i = 0; i < len; i++) {\n\t\thash = (hash << OneEighth) + str[i];\n\t\tif ((test = hash & HighBits) != 0) {\n\t\t\thash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t ELFHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tuint64_t x = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash << 4) + str[i];\n\t\tif ((x = hash & 0xF0000000L) != 0) {\n\t\t\thash ^= (x >> 24);\n\t\t}\n\t\thash &= ~x;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t BKDRHash(cuc *str, uint32_t len) {\n\tuint64_t seed = 131;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash * seed) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t SDBMHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = str[i] + (hash << 6) + (hash << 16) - hash;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DJBHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 5381;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) + hash) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DEKHash(cuc *str, uint32_t len) {\n\tuint64_t hash = len;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) ^ (hash << 27)) ^ str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t APHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0xAAAAAAAA;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\tif ((i & 1) == 0) {\n\t\t\thash ^= ((hash << 7) ^ str[i] * (hash >> 3));\n\t\t}\n\t\telse {\n\t\t\thash ^= (~(((hash << 11) + str[i]) ^ (hash >> 5)));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nunsigned int(*HashFunction::hash_function[])(const unsigned char *, unsigned int) = {\n\t&BOB1, &BOB2, &BOB3, &BOB4,\n\t&RSHash, &JSHash,\n\t&PJWHash, &ELFHash,\n\t&BKDRHash, &SDBMHash,\n\t&DJBHash, &DEKHash,\n\t&APHash, &DJBHash\n};\n\nHashFunction::HashFunction() {\n\t/*\n\thash_function[0] = &BOB1;\n\thash_function[1] = &BOB2;\n\thash_function[2] = &BOB3;\n\thash_function[3] = &BOB4;\n\thash_function[4] = &RSHash;\n\thash_function[5] = &JSHash;\n\thash_function[6] = &PJWHash;\n\thash_function[7] = &ELFHash;\n\thash_function[8] = &BKDRHash;\n\thash_function[9] = &SDBMHash;\n\thash_function[10] = &DJBHash;\n\thash_function[11] = &DEKHash;\n\thash_function[12] = &APHash;\n\thash_function[13] = &DJBHash;\n\t*/\n}\n\nuint32_t HashFunction::Str2Int(cuc *str, uint32_t hidx, uint32_t len) {\n\treturn hash_function[hidx](str, len);\n}\n\n#endif //_PARAMS_H\n\n#ifndef _SPA_H\n#define _SPA_H\nusing namespace std;\n\nclass SPA\n{\npublic:\n\tvirtual void insert(unsigned int key, int f) = 0;\n};\n\n#endif // _SPA_H\nusing std::min;\nusing std::swap;\n\n\n/*----optional according to your need----*/\n\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ASketch_CM: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint tot_memory_in_bytes,filter_size,capacity;//parameter\n\tint w;\n\tint bucket_num;\n\n\tint *new_count;\n\tint *old_count;\n\tuint32_t *items;\n\n\tint cur_pos;\n\tint d = 3;\n\tmyCMSketch<4,1000,3> * sketch;\n\t\n\n\tBOBHash *bobhash[MAX_HASH_NUM];\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ASketch_CM()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ASketch_CM\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"tot_memory_in_bytes\")\n {\n\n\t\t\ttot_memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"filter_size\")\n {\n\t\t\tfilter_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\t\n\t\tw = tot_memory_in_bytes - filter_size * 12;\n\t\t\n\t\tsketch = new myCMSketch<4,1000, 3>(w);\n\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\t\t\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n\t\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tsrand(time(0));\n\t\t\tbobhash[i]->SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t}\n /*----optional according to your need----*/\n }\n\tuint32_t * get_items()\n\t{\n\t\treturn items;\n\t}\n\n\tint * get_freq()\n\t{\n\t\treturn new_count;\n\t}\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n /*----optional according to your need----*/\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\tnew_count[matched_index] += 1;\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t\tif (cur_pos != filter_size)\n\t\t{\n\t\t\titems[cur_pos] = *key;\n\t\t\tnew_count[cur_pos] = 1;\n\t\t\told_count[cur_pos] = 0;\n\t\t\tcur_pos++;\n\t\t\treturn;\n\t\t}\n\t\t\n\t\tint estimate_value, min_index, min_value, temp;\n\t\tsketch->insert(str);\n\t\n\t\testimate_value = sketch->frequencyQuery(str, len);\n\t\n\t\tmin_index = 0;\n\t\tmin_value = (1 << 30);\n\t\tfor (int i = 0; i < filter_size; i++)\n\t\t{\n\t\t\tif (items[i] != (uint32_t)(-1) && min_value > new_count[i])\n\t\t\t{\n\t\t\t\tmin_value = new_count[i];\n\t\t\t\tmin_index = i;\n\t\t\t}\n\t\t}\n\t\tif (estimate_value > min_value)\n\t\t{\n\t\t\ttemp = new_count[min_index] - old_count[min_index];\n\t\t\tfor(int i =0;i<temp;++i)\n\t\t\t\tsketch->insert(str);\n\t\t\titems[min_index] = *key;\n\t\t\tnew_count[min_index] = estimate_value;\n\t\t\told_count[min_index] = estimate_value;\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\treturn new_count[matched_index];\n\t\t\t}\n\t\t}\n\t\treturn sketch->frequencyQuery(str, len);\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\t\t\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\tsketch = new myCMSketch<4,1000, 3>(w);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n /*----optional according to your need----*/\n }\n ~ASketch_CM()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\t\n\t\tdelete[] new_count;\n\t\tdelete[] old_count;\n\t\tdelete[] items;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ASketch_CM);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/ASketch_CM.h"}, {"tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "name": "ASketch_CM_CU", "params": [{"field": "tot_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_size", "help": "", "type": "int"}], "code": "#ifndef ASketch_CM_CU_H //must change this MACRO\n#define ASketch_CM_CU_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n#include \"../atomSketch/myCMCUSketch.h\" //If you want to use DSAB-builtin sketch must include this\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <cstring>\n#include <algorithm> \n#include <x86intrin.h>\n#include <bmiintrin.h>\n#include<memory.h>\n#include <cstdlib>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\nusing namespace std;\n\n#ifndef ACURCMP\n#define ACURCMP\nbool AcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\n#endif // ACURCMP\n\n\n#define MAX_HASH_NUM 16\n#ifndef _PARAMS_H\n#define _PARAMS_H\n\n#include <cstdint>\n\n//#define MAX_INSERT_PACKAGE 32000000\n//#define MAX_HASH_NUM 20\n#define STR_LEN 8\n\ntypedef const unsigned char cuc;\ntypedef unsigned int(*funPtr)(const unsigned char *, unsigned int);\n\nstruct HashFunction {\n\tHashFunction();\n\tstatic uint32_t Str2Int(cuc *str, uint32_t hidx, uint32_t len = STR_LEN);\n\tstatic funPtr hash_function[14];\n};\n\n#define mix(a, b, c) \\\n{ \\\n a -= b; a -= c; a ^= (c>>13); \\\n b -= c; b -= a; b ^= (a<<8); \\\n c -= a; c -= b; c ^= (b>>13); \\\n a -= b; a -= c; a ^= (c>>12); \\\n b -= c; b -= a; b ^= (a<<16); \\\n c -= a; c -= b; c ^= (b>>5); \\\n a -= b; a -= c; a ^= (c>>3); \\\n b -= c; b -= a; b ^= (a<<10); \\\n c -= a; c -= b; c ^= (b>>15); \\\n}\n\nuint32_t BOB1(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 2;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t BOB2(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 31;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t\nBOB3(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 73;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\nuint32_t\nBOB4(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 127;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t RSHash(cuc *str, uint32_t len) {\n\tuint32_t b = 378551;\n\tuint32_t a = 63689;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = hash * a + str[i];\n\t\ta = a * b;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t JSHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 1315423911;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash ^= ((hash << 5) + str[i] + (hash >> 2));\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t PJWHash(cuc *str, uint32_t len) {\n\tuint64_t BitsInUnsignedInt = (uint64_t)32;\n\tuint64_t ThreeQuarters = (uint64_t)((BitsInUnsignedInt * 3) / 4);\n\tuint64_t OneEighth = (uint64_t)(BitsInUnsignedInt / 8);\n\tuint64_t HighBits = (uint64_t)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);\n\tuint64_t hash = 0;\n\tuint64_t test = 0;\n\tfor (uint32_t i = 0; i < len; i++) {\n\t\thash = (hash << OneEighth) + str[i];\n\t\tif ((test = hash & HighBits) != 0) {\n\t\t\thash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t ELFHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tuint64_t x = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash << 4) + str[i];\n\t\tif ((x = hash & 0xF0000000L) != 0) {\n\t\t\thash ^= (x >> 24);\n\t\t}\n\t\thash &= ~x;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t BKDRHash(cuc *str, uint32_t len) {\n\tuint64_t seed = 131;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash * seed) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t SDBMHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = str[i] + (hash << 6) + (hash << 16) - hash;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DJBHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 5381;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) + hash) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DEKHash(cuc *str, uint32_t len) {\n\tuint64_t hash = len;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) ^ (hash << 27)) ^ str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t APHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0xAAAAAAAA;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\tif ((i & 1) == 0) {\n\t\t\thash ^= ((hash << 7) ^ str[i] * (hash >> 3));\n\t\t}\n\t\telse {\n\t\t\thash ^= (~(((hash << 11) + str[i]) ^ (hash >> 5)));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nunsigned int(*HashFunction::hash_function[])(const unsigned char *, unsigned int) = {\n\t&BOB1, &BOB2, &BOB3, &BOB4,\n\t&RSHash, &JSHash,\n\t&PJWHash, &ELFHash,\n\t&BKDRHash, &SDBMHash,\n\t&DJBHash, &DEKHash,\n\t&APHash, &DJBHash\n};\n\nHashFunction::HashFunction() {\n\t/*\n\thash_function[0] = &BOB1;\n\thash_function[1] = &BOB2;\n\thash_function[2] = &BOB3;\n\thash_function[3] = &BOB4;\n\thash_function[4] = &RSHash;\n\thash_function[5] = &JSHash;\n\thash_function[6] = &PJWHash;\n\thash_function[7] = &ELFHash;\n\thash_function[8] = &BKDRHash;\n\thash_function[9] = &SDBMHash;\n\thash_function[10] = &DJBHash;\n\thash_function[11] = &DEKHash;\n\thash_function[12] = &APHash;\n\thash_function[13] = &DJBHash;\n\t*/\n}\n\nuint32_t HashFunction::Str2Int(cuc *str, uint32_t hidx, uint32_t len) {\n\treturn hash_function[hidx](str, len);\n}\n\n#endif //_PARAMS_H\n\n#ifndef _SPA_H\n#define _SPA_H\nusing namespace std;\n\nclass SPA\n{\npublic:\n\tvirtual void insert(unsigned int key, int f) = 0;\n};\n\n#endif // _SPA_H\nusing std::min;\nusing std::swap;\n\n\n/*----optional according to your need----*/\n\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ASketch_CM_CU: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint tot_memory_in_bytes,filter_size,capacity;//parameter\n\tint w;\n\tint bucket_num;\n\n\tint *new_count;\n\tint *old_count;\n\tuint32_t *items;\n\n\tint cur_pos;\n\tint d = 3;\n\tmyCMCUSketch<4,1000,3> * sketch;\n\t\n\n\tBOBHash *bobhash[MAX_HASH_NUM];\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ASketch_CM_CU()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ASketch_CM_CU\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"tot_memory_in_bytes\")\n {\n\n\t\t\ttot_memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"filter_size\")\n {\n\t\t\tfilter_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tw = tot_memory_in_bytes - filter_size * 12;\n\t\tsketch = new myCMCUSketch<4,1000, 3>(w);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\t\t\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tsrand(time(0));\n\t\t\tbobhash[i]->SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t}\n /*----optional according to your need----*/\n }\n\tuint32_t * get_items()\n\t{\n\t\treturn items;\n\t}\n\n\tint * get_freq()\n\t{\n\t\treturn new_count;\n\t}\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n /*----optional according to your need----*/\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\tnew_count[matched_index] += 1;\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t\tif (cur_pos != filter_size)\n\t\t{\n\t\t\titems[cur_pos] = *key;\n\t\t\tnew_count[cur_pos] = 1;\n\t\t\told_count[cur_pos] = 0;\n\t\t\tcur_pos++;\n\t\t\treturn;\n\t\t}\n\t\tint estimate_value, min_index, min_value, temp;\n\t\tsketch->insert(str);\n\t\testimate_value = sketch->frequencyQuery(str, len);\n\t\tmin_index = 0;\n\t\tmin_value = (1 << 30);\n\t\tfor (int i = 0; i < filter_size; i++)\n\t\t{\n\t\t\tif (items[i] != (uint32_t)(-1) && min_value > new_count[i])\n\t\t\t{\n\t\t\t\tmin_value = new_count[i];\n\t\t\t\tmin_index = i;\n\t\t\t}\n\t\t}\n\t\tif (estimate_value > min_value)\n\t\t{\n\t\t\ttemp = new_count[min_index] - old_count[min_index];\n\t\t\tfor(int i =0;i<temp;++i)\n\t\t\t\tsketch->insert(str);\n\t\t\titems[min_index] = *key;\n\t\t\tnew_count[min_index] = estimate_value;\n\t\t\told_count[min_index] = estimate_value;\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\treturn new_count[matched_index];\n\t\t\t}\n\t\t}\n\t\treturn sketch->frequencyQuery(str, len);\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\t\t\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\tsketch = new myCMCUSketch<4,1000, 3>(w);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n /*----optional according to your need----*/\n }\n ~ASketch_CM_CU()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\t\n\t\tdelete[] new_count;\n\t\tdelete[] old_count;\n\t\tdelete[] items;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ASketch_CM_CU);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/ASketch_CM_CU.h"}, {"tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "name": "ASketch_Count", "params": [{"field": "tot_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_size", "help": "", "type": "int"}], "code": "#ifndef ASketch_Count_H //must change this MACRO\n#define ASketch_Count_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <cstring>\n#include <algorithm> \n#include <x86intrin.h>\n#include <bmiintrin.h>\n#include<memory.h>\n#include <cstdlib>\n#include <vector>\n#include <ctime>\n#include <cmath>\n#include <sstream>\nusing namespace std;\n\n#ifndef ACURCMP\n#define ACURCMP\nbool AcurCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\n#endif // ACURCMP\n\n\n#define MAX_HASH_NUM 16\n#ifndef _PARAMS_H\n#define _PARAMS_H\n\n#include <cstdint>\n\n//#define MAX_INSERT_PACKAGE 32000000\n//#define MAX_HASH_NUM 20\n#define STR_LEN 8\n\ntypedef const unsigned char cuc;\ntypedef unsigned int(*funPtr)(const unsigned char *, unsigned int);\n\nstruct HashFunction {\n\tHashFunction();\n\tstatic uint32_t Str2Int(cuc *str, uint32_t hidx, uint32_t len = STR_LEN);\n\tstatic funPtr hash_function[14];\n};\n\n#define mix(a, b, c) \\\n{ \\\n a -= b; a -= c; a ^= (c>>13); \\\n b -= c; b -= a; b ^= (a<<8); \\\n c -= a; c -= b; c ^= (b>>13); \\\n a -= b; a -= c; a ^= (c>>12); \\\n b -= c; b -= a; b ^= (a<<16); \\\n c -= a; c -= b; c ^= (b>>5); \\\n a -= b; a -= c; a ^= (c>>3); \\\n b -= c; b -= a; b ^= (a<<10); \\\n c -= a; c -= b; c ^= (b>>15); \\\n}\n\nuint32_t BOB1(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 2;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t BOB2(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 31;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t\nBOB3(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 73;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\nuint32_t\nBOB4(cuc *str, uint32_t len) {\n\t//register ub4 a,b,c,len;\n\tuint32_t a, b, c;\n\tuint32_t initval = 127;\n\t/* Set up the internal state */\n\t//len = length;\n\ta = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */\n\tc = initval; /* the previous hash value */\n\n\t\t\t\t\t\t /*---------------------------------------- handle most of the key */\n\twhile (len >= 12) {\n\t\ta += (str[0] + ((uint32_t)str[1] << 8) + ((uint32_t)str[2] << 16) + ((uint32_t)str[3] << 24));\n\t\tb += (str[4] + ((uint32_t)str[5] << 8) + ((uint32_t)str[6] << 16) + ((uint32_t)str[7] << 24));\n\t\tc += (str[8] + ((uint32_t)str[9] << 8) + ((uint32_t)str[10] << 16) + ((uint32_t)str[11] << 24));\n\t\tmix(a, b, c);\n\t\tstr += 12;\n\t\tlen -= 12;\n\t}\n\n\t/*------------------------------------- handle the last 11 bytes */\n\tc += len;\n\tswitch (len) /* all the case statements fall through */ {\n\tcase 11: c += ((uint32_t)str[10] << 24);\n\tcase 10: c += ((uint32_t)str[9] << 16);\n\tcase 9: c += ((uint32_t)str[8] << 8);\n\t\t/* the first byte of c is reserved for the length */\n\tcase 8: b += ((uint32_t)str[7] << 24);\n\tcase 7: b += ((uint32_t)str[6] << 16);\n\tcase 6: b += ((uint32_t)str[5] << 8);\n\tcase 5: b += str[4];\n\tcase 4: a += ((uint32_t)str[3] << 24);\n\tcase 3: a += ((uint32_t)str[2] << 16);\n\tcase 2: a += ((uint32_t)str[1] << 8);\n\tcase 1: a += str[0];\n\t\t/* case 0: nothing left to add */\n\t}\n\tmix(a, b, c);\n\t/*-------------------------------------------- report the result */\n\treturn c;\n}\n\nuint32_t RSHash(cuc *str, uint32_t len) {\n\tuint32_t b = 378551;\n\tuint32_t a = 63689;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = hash * a + str[i];\n\t\ta = a * b;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t JSHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 1315423911;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash ^= ((hash << 5) + str[i] + (hash >> 2));\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t PJWHash(cuc *str, uint32_t len) {\n\tuint64_t BitsInUnsignedInt = (uint64_t)32;\n\tuint64_t ThreeQuarters = (uint64_t)((BitsInUnsignedInt * 3) / 4);\n\tuint64_t OneEighth = (uint64_t)(BitsInUnsignedInt / 8);\n\tuint64_t HighBits = (uint64_t)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);\n\tuint64_t hash = 0;\n\tuint64_t test = 0;\n\tfor (uint32_t i = 0; i < len; i++) {\n\t\thash = (hash << OneEighth) + str[i];\n\t\tif ((test = hash & HighBits) != 0) {\n\t\t\thash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t ELFHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tuint64_t x = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash << 4) + str[i];\n\t\tif ((x = hash & 0xF0000000L) != 0) {\n\t\t\thash ^= (x >> 24);\n\t\t}\n\t\thash &= ~x;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t BKDRHash(cuc *str, uint32_t len) {\n\tuint64_t seed = 131;\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = (hash * seed) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t SDBMHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = str[i] + (hash << 6) + (hash << 16) - hash;\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DJBHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 5381;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) + hash) + str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t DEKHash(cuc *str, uint32_t len) {\n\tuint64_t hash = len;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\thash = ((hash << 5) ^ (hash << 27)) ^ str[i];\n\t}\n\treturn (uint32_t)hash;\n}\n\nuint32_t APHash(cuc *str, uint32_t len) {\n\tuint64_t hash = 0xAAAAAAAA;\n\tfor (uint32_t i = 0; i < len; ++i) {\n\t\tif ((i & 1) == 0) {\n\t\t\thash ^= ((hash << 7) ^ str[i] * (hash >> 3));\n\t\t}\n\t\telse {\n\t\t\thash ^= (~(((hash << 11) + str[i]) ^ (hash >> 5)));\n\t\t}\n\t}\n\treturn (uint32_t)hash;\n}\n\nunsigned int(*HashFunction::hash_function[])(const unsigned char *, unsigned int) = {\n\t&BOB1, &BOB2, &BOB3, &BOB4,\n\t&RSHash, &JSHash,\n\t&PJWHash, &ELFHash,\n\t&BKDRHash, &SDBMHash,\n\t&DJBHash, &DEKHash,\n\t&APHash, &DJBHash\n};\n\nHashFunction::HashFunction() {\n\t/*\n\thash_function[0] = &BOB1;\n\thash_function[1] = &BOB2;\n\thash_function[2] = &BOB3;\n\thash_function[3] = &BOB4;\n\thash_function[4] = &RSHash;\n\thash_function[5] = &JSHash;\n\thash_function[6] = &PJWHash;\n\thash_function[7] = &ELFHash;\n\thash_function[8] = &BKDRHash;\n\thash_function[9] = &SDBMHash;\n\thash_function[10] = &DJBHash;\n\thash_function[11] = &DEKHash;\n\thash_function[12] = &APHash;\n\thash_function[13] = &DJBHash;\n\t*/\n}\n\nuint32_t HashFunction::Str2Int(cuc *str, uint32_t hidx, uint32_t len) {\n\treturn hash_function[hidx](str, len);\n}\n\n#endif //_PARAMS_H\n\n#ifndef _SPA_H\n#define _SPA_H\nusing namespace std;\n\nclass SPA\n{\npublic:\n\tvirtual void insert(unsigned int key, int f) = 0;\n};\n\n#endif // _SPA_H\nusing std::min;\nusing std::swap;\n\n#define SQR(X) (X) * (X)\n#ifndef myCountSketch_H\n#define myCountSketch_H\n\ntemplate<uint8_t univ_key_len, int capacity, int d = 3>\nstruct myCountSketch {\npublic:\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK heap[capacity];\n\tint heap_element_num;\n\tint mem_in_bytes;\n\tint w;\n\tint * cm_sketch[d];\n\tBOBHash * hash[d];\n\tBOBHash * hash_polar[d];\n\tunordered_map<string, uint32_t> ht;\n\n\tdouble get_f2()\n\t{\n\t\tdouble res[d];\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdouble est = 0;\n\t\t\tfor (int j = 0; j < w; ++j) {\n\t\t\t\test += SQR(double(cm_sketch[i][j]));\n\t\t\t}\n\t\t\tres[i] = est;\n\t\t}\n\n\t\tsort(res, res + d);\n\t\tif (d % 2) {\n\t\t\treturn res[d / 2];\n\t\t}\n\t\telse {\n\t\t\treturn (res[d / 2] + res[d / 2 - 1]) / 2;\n\t\t}\n\t}\n\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\n\t//public:\n\tstring name;\n\n\tmyCountSketch(int mem_in_bytes_) : mem_in_bytes(mem_in_bytes_), heap_element_num(0) {\n\t\t// memset(heap, 0, sizeof(heap));\n\t\tw = mem_in_bytes / 4 / d;\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\tmemset(cm_sketch, 0, sizeof(cm_sketch));\n\t\tsrand(time(0));\n\t\tfor (int i = 0; i < d; i++) {\n\t\t\thash[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\thash_polar[i] = new BOBHash(uint32_t(rand() % MAX_PRIME32));\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t}\n\n\t\tstringstream name_buf;\n\t\tname_buf << \"myCountSketch@\" << mem_in_bytes;\n\t\tname = name_buf.str();\n\t}\n\n\tvoid insert(const char * key) {\n\t\tint ans[d];\n\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tint idx = hash[i]->Run(key, univ_key_len) % w;\n\t\t\tint polar = hash_polar[i]->Run(key, univ_key_len) % 2;\n\n\t\t\tcm_sketch[i][idx] += polar ? 1 : -1;\n\n\t\t\tint val = cm_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\t}\n\n\t// void get_top_k(uint16_t k, uint32_t * result) {\n\t// VK * a = new VK[capacity];\n\t// memcpy(a, heap, sizeof(heap));\n\t// sort(a, a + capacity);\n\t// int i;\n\t// for (i = 0; i < k && i < capacity; ++i) {\n\t// result[i] = a[capacity - 1 - i].second;\n\t// }\n\t// for (; i < k; ++i) {\n\t// result[i] = 0;\n\t// }\n\t// }\n\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint ans[d];\n\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tint idx = hash[i]->Run(str, len) % w;\n\t\t\tint polar = hash_polar[i]->Run(str, len) % 2;\n\n\n\t\t\tint val = cm_sketch[i][idx];\n\n\t\t\tans[i] = polar ? val : -val;\n\t\t}\n\t\tsort(ans, ans + d);\n\t\tint tmin;\n\t\tif (d % 2 == 0) {\n\t\t\ttmin = (ans[d / 2] + ans[d / 2 - 1]) / 2;\n\t\t}\n\t\telse {\n\t\t\ttmin = ans[d / 2];\n\t\t}\n\t\ttmin = (tmin <= 1) ? 1 : tmin;\n\t\treturn tmin;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid get_top_k_with_frequency(uint16_t k, vector<KV> & result) {\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\tresult[i].first = a[capacity - 1 - i].second;\n\t\t\tresult[i].second = a[capacity - 1 - i].first;\n\t\t}\n\t\tfor (; i < k; ++i) {\n\t\t\tresult[i].second = 0;\n\t\t}\n\t}\n\n\tvoid get_l2_heavy_hitters(double alpha, vector<KV> & result)\n\t{\n\t\tget_top_k_with_frequency(capacity, result);\n\t\tdouble f2 = get_f2();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (SQR(double(result[i].second)) < alpha * f2) {\n\t\t\t\tresult.resize(i);\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid get_heavy_hitters(uint32_t threshold, std::vector<pair<string, uint32_t> >& ret)\n\t{\n\t\tret.clear();\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\tif (heap[i].first >= threshold) {\n\t\t\t\tret.emplace_back(make_pair(heap[i].second, heap[i].first));\n\t\t\t}\n\t\t}\n\t}\n\n\t~myCountSketch() {\n\t\tfor (int i = 0; i < d; ++i) {\n\t\t\tdelete hash[i];\n\t\t\tdelete hash_polar[i];\n\t\t\tdelete cm_sketch[i];\n\t\t}\n\t\treturn;\n\t}\n};\n#endif // ! myCountSketch_H\n\n\n/*----optional according to your need----*/\n\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ASketch_Count: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint tot_memory_in_bytes,filter_size,capacity;//parameter\n\tint w;\n\tint bucket_num;\n\n\tint *new_count;\n\tint *old_count;\n\tuint32_t *items;\n\n\tint cur_pos;\n\tint d = 3;\n\tmyCountSketch<4,1000,3> * sketch;\n\t\n\n\tBOBHash *bobhash[MAX_HASH_NUM];\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ASketch_Count()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ASketch_Count\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"tot_memory_in_bytes\")\n {\n\n\t\t\ttot_memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"filter_size\")\n {\n\t\t\tfilter_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tw = tot_memory_in_bytes - filter_size * 12;\n\t\tsketch = new myCountSketch<4,1000, 3>(w);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\t\t\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tsrand(time(0));\n\t\t\tbobhash[i]->SetSeed(uint32_t(rand() % MAX_PRIME32));\n\t\t}\n /*----optional according to your need----*/\n }\n\tuint32_t * get_items()\n\t{\n\t\treturn items;\n\t}\n\n\tint * get_freq()\n\t{\n\t\treturn new_count;\n\t}\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n /*----optional according to your need----*/\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\tnew_count[matched_index] += 1;\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t\tif (cur_pos != filter_size)\n\t\t{\n\t\t\titems[cur_pos] = *key;\n\t\t\tnew_count[cur_pos] = 1;\n\t\t\told_count[cur_pos] = 0;\n\t\t\tcur_pos++;\n\t\t\treturn;\n\t\t}\n\t\tint estimate_value, min_index, min_value, temp;\n\t\tsketch->insert(str);\n\t\testimate_value = sketch->frequencyQuery(str, len);\n\t\tmin_index = 0;\n\t\tmin_value = (1 << 30);\n\t\tfor (int i = 0; i < filter_size; i++)\n\t\t{\n\t\t\tif (items[i] != (uint32_t)(-1) && min_value > new_count[i])\n\t\t\t{\n\t\t\t\tmin_value = new_count[i];\n\t\t\t\tmin_index = i;\n\t\t\t}\n\t\t}\n\t\tif (estimate_value > min_value)\n\t\t{\n\t\t\ttemp = new_count[min_index] - old_count[min_index];\n\t\t\tfor(int i =0;i<temp;++i)\n\t\t\t\tsketch->insert(str);\n\t\t\titems[min_index] = *key;\n\t\t\tnew_count[min_index] = estimate_value;\n\t\t\told_count[min_index] = estimate_value;\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint * key = new int;\n\t\tmemcpy(key, str, 4);\n\t\tconst __m128i item = _mm_set1_epi32(*key);\n\t\tfor (int i = 0; i < bucket_num; i++)\n\t\t{\n\t\t\t__m128i *keys_p = (__m128i *)(items + (i << 4));\n\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tint matched = _mm_movemask_epi8(a_comp);\n\n\t\t\tif (matched != 0)\n\t\t\t{\n\t\t\t\t//return 32 if input is zero;\n\t\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched) + (i << 4);\n\t\t\t\treturn new_count[matched_index];\n\t\t\t}\n\t\t}\n\t\treturn sketch->frequencyQuery(str, len);\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\t\t\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\tsketch = new myCountSketch<4,1000, 3>(w);\n\t\tbucket_num = filter_size / 16;\n\t\tnew_count = new int[filter_size];\n\t\told_count = new int[filter_size];\n\t\titems = new uint32_t[filter_size];\n\n\t\tmemset(items, 0, sizeof(items));\n\t\tmemset(new_count, 0, sizeof(new_count));\n\t\tmemset(old_count, 0, sizeof(old_count));\n\t\tcur_pos = 0;\n /*----optional according to your need----*/\n }\n ~ASketch_Count()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\t\n\t\tdelete[] new_count;\n\t\tdelete[] old_count;\n\t\tdelete[] items;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ASketch_Count);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/ASketch_Count.h"}, {"tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "name": "LossyCounting", "params": [{"field": "window_size", "help": "", "type": "int"}], "code": "//implemention based MASketch \n#ifndef LossyCounting_H //must change this MACRO\n#define LossyCounting_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n#define DEBUG\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\nusing std::min;\ntypedef struct counter\n{\n\tstring item;\n\tint count;\n} Counter;\n\ntypedef struct LC_type\n{\n\tCounter *bucket;\n\tCounter *holder;\n\tCounter *newcount;\n\tint buckets;\n\tint holdersize;\n\tint maxholder;\n\tint window;\n\tint epoch;\n} LC_type;\n\nbool LossyCountcmp(const pair<string, uint32_t> a, const pair<string, uint32_t> b) {\n\treturn a.second > b.second;\n}\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass LossyCounting: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint window_size;//parameter\n\n\tLC_type * result;\n /*----optional according to your need----*/\n\tvoid countershell(int n, Counter a[])\n\t{\n\t\tint i, j, inc;\n\t\tCounter v;\n\t\tinc = 1;\n\t\tdo\n\t\t{\n\t\t\tinc *= 3;\n\t\t\tinc++;\n\t\t} while (inc <= n);\n\t\tdo\n\t\t{\n\t\t\tinc /= 3;\n\t\t\tfor (i = inc + 1; i <= n; i++)\n\t\t\t{\n\t\t\t\tv = a[i - 1];\n\t\t\t\tj = i;\n\t\t\t\twhile (a[j - inc - 1].item > v.item)\n\t\t\t\t{\n\t\t\t\t\ta[j - 1] = a[j - inc - 1];\n\t\t\t\t\tj -= inc;\n\t\t\t\t\tif (j <= inc) break;\n\t\t\t\t}\n\t\t\t\ta[j - 1] = v;\n\t\t\t}\n\t\t} while (inc > 1);\n\t}\n\tint countermerge(Counter *newcount, Counter *left, Counter *right,\n\t\tint l, int r, int maxholder)\n\t{ // merge up two lists of counters. returns the size of the lists. \n\t\tint i, j, m;\n\n\t\tif (l + r>maxholder)\n\t\t{ // a more advanced implementation would do a realloc here...\n\t\t\tprintf(\"Out of memory -- trying to allocate %d counters\\n\", l + r);\n\t\t\texit(1);\n\t\t}\n\t\ti = 0;\n\t\tj = 0;\n\t\tm = 0;\n\n\t\twhile (i<l && j<r)\n\t\t{ // merge two lists\n\t\t\tif (left[i].item == right[j].item)\n\t\t\t{ // sum the counts of identical items\n\t\t\t\tnewcount[m].item = left[i].item;\n\t\t\t\tnewcount[m].count = right[j].count;\n\t\t\t\twhile (left[i].item == right[j].item)\n\t\t\t\t{\n\t\t\t\t\tnewcount[m].count += left[i].count;\n\t\t\t\t\ti++;\n\t\t\t\t}\n\t\t\t\tj++;\n\t\t\t}\n\t\t\telse if (left[i].item<right[j].item)\n\t\t\t{ // else take the left item, creating counts appropriately\n\t\t\t\tnewcount[m].item = left[i].item;\n\t\t\t\tnewcount[m].count = 0;\n\t\t\t\twhile (left[i].item == newcount[m].item)\n\t\t\t\t{\n\t\t\t\t\tnewcount[m].count += left[i].count;\n\t\t\t\t\ti++;\n\t\t\t\t}\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tnewcount[m].item = right[j].item;\n\t\t\t\tnewcount[m].count = right[j].count;\n\t\t\t\tj++;\n\t\t\t}\n\t\t\tnewcount[m].count--;\n\t\t\tif (newcount[m].count>0) m++;\n\t\t\telse\n\t\t\t{ // adjust for items which may have negative or zero counts\n\t\t\t\tnewcount[m].item = -1;\n\t\t\t\tnewcount[m].count = 0;\n\t\t\t}\n\t\t}\n\n\t\t// now that the main part of the merging has been done\n\t\t// need to copy over what remains of whichever list is not used up\n\n\t\tif (j<r)\n\t\t{\n\t\t\twhile (j<r)\n\t\t\t{\n\t\t\t\tif (right[j].count > 1)\n\t\t\t\t{\n\t\t\t\t\tnewcount[m].item = right[j].item;\n\t\t\t\t\tnewcount[m].count = right[j].count - 1;\n\t\t\t\t\tm++;\n\t\t\t\t}\n\t\t\t\tj++;\n\t\t\t}\n\t\t}\n\t\telse\n\t\t\tif (i<l)\n\t\t\t{\n\t\t\t\twhile (i<l)\n\t\t\t\t{\n\t\t\t\t\tnewcount[m].item = left[i].item;\n\t\t\t\t\tnewcount[m].count = -1;\n\t\t\t\t\twhile ((newcount[m].item == left[i].item) && (i<l))\n\t\t\t\t\t{\n\t\t\t\t\t\tnewcount[m].count += left[i].count;\n\t\t\t\t\t\ti++;\n\t\t\t\t\t}\n\t\t\t\t\tif (newcount[m].count>0)\n\t\t\t\t\t\tm++;\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\tnewcount[m].item = -1;\n\t\t\t\t\t\tnewcount[m].count = 0;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\treturn(m);\n\t}\n\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n LossyCounting()\n {\n /*constructed function MUST BE non-parameter!!!*/\n sketch_name = \"LossyCounting\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"window_size\")\n {\n\n window_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tresult = (LC_type *)calloc(1, sizeof(LC_type));\n\t\tresult->buckets = 0;\n\t\tresult->holdersize = 0;\n\t\tresult->epoch = 0;\n\t\tresult->window = window_size;\n\t\tresult->maxholder = result->window * 4;\n\t\tresult->bucket = (Counter*)calloc(result->window + 2, sizeof(Counter));\n\t\tresult->holder = (Counter*)calloc(result->maxholder, sizeof(Counter));\n\t\tresult->newcount = (Counter*)calloc(result->maxholder, sizeof(Counter));\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tCounter *tmp;\n\t\t// interpret a negative item identifier as a removal\n\t\tresult->bucket[result->buckets].item = string(str,len);\n\t\tresult->bucket[result->buckets].count = 1;\n\t\tif (result->buckets == result->window)\n\t\t{\n\t\t\tcountershell(result->window, result->bucket);\n\t\t\tresult->holdersize = countermerge(result->newcount, result->bucket, result->holder,\n\t\t\t\tresult->window, result->holdersize, result->maxholder);\n\t\t\ttmp = result->newcount;\n\t\t\tresult->newcount = result->holder;\n\t\t\tresult->holder = tmp;\n\t\t\tresult->buckets = 0;\n\t\t\tresult->epoch++;\n\t\t}\n\t\telse\n\t\t\tresult->buckets++;\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n return 0;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tint i, point=0;\n\t\n\t\n\t\t// should do a countermerge here.\n\t\tcountershell(result->buckets, result->bucket);\n\t\tresult->holdersize = countermerge(result->newcount, result->bucket, result->holder,\n\t\t\tresult->buckets, result->holdersize, result->maxholder);\n\t\n\t\tauto swap_tmp = result->newcount;\n\t\tresult->newcount = result->holder;\n\t\tresult->holder = swap_tmp;\n\t\tresult->buckets = 0;\n\t\tvector<pair<string, uint32_t> >tmp;\n\t\tfor (i = 0; i<result->holdersize; i++)\n\t\t{\n\t\t\ttmp.emplace_back(make_pair(result->holder[i].item, result->holder[i].count + result->epoch));\n\t\t}\n\t\tsort(tmp.begin(), tmp.end(),LossyCountcmp);\n\t\t\n\t\tcout << k << endl;\n\t\tcout << result->holdersize << endl;\n\t\tfor (int i = 0; i < min(k,result->holdersize); i++)\n\t\t{\n\t\t\ttopkItem.push_back(tmp[i]);\n\t\t}\n return topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\tfree(result->bucket);\n\t\tfree(result->holder);\n\t\tfree(result->newcount);\n\t\tfree(result);\n\t\tresult = (LC_type *)calloc(1, sizeof(LC_type));\n\t\tresult->buckets = 0;\n\t\tresult->holdersize = 0;\n\t\tresult->epoch = 0;\n\t\tresult->window = (int) 1.0 / window_size;\n\t\tresult->maxholder = result->window * 4;\n\t\tresult->bucket = (Counter*)calloc(result->window + 2, sizeof(Counter));\n\t\tresult->holder = (Counter*)calloc(result->maxholder, sizeof(Counter));\n\t\tresult->newcount = (Counter*)calloc(result->maxholder, sizeof(Counter));\n /*----optional according to your need----*/\n }\n ~LossyCounting()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\tfree(result->bucket);\n\t\tfree(result->holder);\n\t\tfree(result->newcount);\n\t\tfree(result);\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(LossyCounting);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/LossyCounting.h"}, {"path": "skbm/new_sketch/sketch/CM.h", "name": "CM", "params": [{"field": "memory_in_bytes", "help": "", "type": "int"}, {"field": "hash_num", "help": "", "type": "int"}], "code": "#ifndef CM_SKETCH_H\n#define CM_SKETCH_H\n#include \"SketchBase.h\"\n#include \"factor.h\"\n#include \"../hash/hashfunction.h\"\n#include<string>\n#include<iostream>\n#include<memory.h>\n/*\nint is type of frequencyQuery Return\nhashfunction type:BOBHash\\\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\nclass CM: public SketchBase {\nprivate:\n int hash_num;//parameter\n\tint memory_in_bytes;//parameter\n\tint w = 0;\n BOBHash * hash;\n int **data;\npublic:\n using SketchBase::sketch_name;\n CM()\n {\n sketch_name = \"CM\";\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n\n if (parameterName==\"hash_num\")\n {\n\n hash_num = parameterValue;\n return;\n }\n if (parameterName==\"memory_in_bytes\")\n {\n\t\t\tmemory_in_bytes = parameterValue;\n return;\n }\n }\n void init()\n {\n\t\tw = memory_in_bytes / 4 / hash_num;\n\t\tdata = new int*[hash_num];\n hash = new BOBHash[hash_num];\n for (int i = 0; i<hash_num; ++i)\n {\n data[i] = new int[w];\n memset(data[i],0,sizeof(int)*w);\n hash[i].SetSeed(i+750);\n }\n\n }\n void Insert(const char * str, const int & len)\n {\n for (int i = 0; i < hash_num; ++i)\n {\n ++data[i][hash[i].Run(str, len) % w];\n }\n }\n int frequencyQuery(const char * str, const int & len)\n {\n int res = data[0][hash[0].Run(str, len) % w];\n for (int i = 1; i < hash_num; ++i) {\n int t = data[i][hash[i].Run(str, len) % w];\n res = res < t ? res : t;\n }\n return res;\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n return topkItem;\n }\n void reset()\n {\n for (int i; i<hash_num; ++i)\n {\n memset(data[i],0,sizeof(int)*w);\n }\n }\n ~CM()\n {\n for (int i; i<hash_num; ++i)\n {\n delete [] data[i];\n }\n delete [] data;\n }\n};\nREGISTER(CM);\n#endif", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}]}, {"params": [{"field": "word_num", "help": "", "type": "int"}, {"field": "d", "help": "", "type": "int"}, {"field": "word_size", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "name": "Pyramid_CM_CU_Heap", "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef Pyramid_CM_CU_Heap_H //must change this MACRO\n#define Pyramid_CM_CU_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <algorithm>\n#include <cstring>\n#include <string.h>\n#include <math.h>\n\n#ifndef _PyramidPARAMS_H\n#define _PyramidPARAMS_H\n\n#define MAX_INSERT_PACKAGE 1000000\n\n#define MAX_HASH_NUM_P 20\n\n#define FILTER_SIZE 32\n\n#define COUNTER_SIZE 16\n\n#define LOW_HASH_NUM 4\n\ntypedef long long lint;\ntypedef unsigned int uint;\ntypedef unsigned long long int uint64;\n#endif //_PARAMS_H\nusing namespace std;\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass Pyramid_CM_CU_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint word_num,d,word_size,capacity;//parameter\n\n\tuint64 *counter[60];\n\tint word_index_size, counter_index_size;\n\tint counter_num;\n\tBOBHash * bobhash[MAX_HASH_NUM_P];\n\t//word_num is the number of words in the first level.\n\n\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK * heap;\n\tint heap_element_num;\n\tunordered_map<string, uint32_t> ht;\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n Pyramid_CM_CU_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"Pyramid_CM_CU_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\n if (parameterName==\"word_num\")\n {\n\n\t\t\tword_num = parameterValue;\n return;\n }\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"d\")\n {\n d = parameterValue;\n return;\n }\n if (parameterName==\"word_size\")\n {\n\t\t\tword_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\theap = new VK[capacity];\n\t\tmemset(heap, 0, capacity * sizeof(VK));\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\theap_element_num = 0;\n\n\n\t\t//for calculating the four hash value constrained in one certain word;\n\t\tword_index_size = 18;\n\n\t\tcounter_index_size = (int)(log(word_size) / log(2)) - 2;//4-8->16-256 counters in one word;\n\t\tcounter_num = (word_num << counter_index_size);\n\n\n\t\tfor (int i = 0; i < 15; i++)\n\t\t{\n\t\t\tcounter[i] = new uint64[word_num >> i];\n\t\t\tmemset(counter[i], 0, sizeof(uint64) * (word_num >> i));\n\t\t}\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tbobhash[i]->SetSeed(i + 1000);\n\t\t}\n\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint min_value = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM];\n\t\tint index[MAX_HASH_NUM];\n\t\tint counter_offset[MAX_HASH_NUM];\n\n\t\tuint64 hash_value = (bobhash[0]->Run(str, len));\n\t\tint my_word_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tcounter_offset[i] = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = ((my_word_index << counter_index_size) + counter_offset[i]) % counter_num;\n\t\t\thash_value >>= counter_index_size;\n\n\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\t\t\tmin_value = value[i] < min_value ? value[i] : min_value;\n\t\t}\n\n\t\tif (min_value != 15)\n\t\t{\n\t\t\tfor (int i = 0; i < d; i++)\n\t\t\t{\n\t\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\t\t\t\tcounter[0][my_word_index] += value[i] == min_value ? ((uint64)0x1 << (counter_offset[i] << 2)) : 0;\n\n\t\t\t}\n\t\t\t\n\t\t}\n\t\telse \n\t\t{\n\t\t\tfor (int i = 0; i < d; i++)\n\t\t\t{\n\t\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\n\t\t\t\tif (value[i] == 0)\n\t\t\t\t\tcontinue;\n\n\t\t\t\tcounter[0][my_word_index] &= (~((uint64)0xF << (counter_offset[i] << 2)));\n\t\t\t\tcarry(index[i]);\n\t\t\t}\n\t\t}\n\t\tint tmin = frequencyQuery(str, len);\n\t\tstring str_key = string(str, len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n\t\treturn;\n\n\t\treturn;\n\t\t\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint min_value = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM];\n\t\tint index[MAX_HASH_NUM];\n\t\tint counter_offset[MAX_HASH_NUM];\n\n\t\tuint64 hash_value = (bobhash[0]->Run(str, len));\n\t\tint my_word_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tcounter_offset[i] = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = ((my_word_index << counter_index_size) + counter_offset[i]) % counter_num;\n\t\t\thash_value >>= counter_index_size;\n\n\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\t\t\tvalue[i] += get_value(index[i]);\n\t\t\tmin_value = value[i] < min_value ? value[i] : min_value;\n\t\t}\n\t\treturn min_value;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\t\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\ttopkItem.push_back(make_pair(a[capacity - 1 - i].second, a[capacity - 1 - i].first));\n\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n\tvoid down_carry(int index)\n\t{\n\t\tint left_or_right, up_left_or_right;\n\n\t\tint value, up_value;\n\t\tint word_index = index >> 4, up_word_index;\n\t\tint offset = index & 0xF;\n\t\tint up_offset = offset;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tup_word_index = (word_index >> 1);\n\t\t\tup_left_or_right = up_word_index & 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif ((value & 3) >= 2)\n\t\t\t{\n\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\telse if ((value & 3) == 1)\n\t\t\t{\n\t\t\t\tup_value = (counter[i + 1][up_word_index] >> (up_offset << 2)) & 0xF;\n\n\t\t\t\t//change this layer's flag bit;\n\t\t\t\tif (((up_value >> (2 + up_left_or_right)) & 1) == 0)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] &= (~((uint64)0x1 << (2 + left_or_right + (offset << 2))));\n\t\t\t\t}\n\n\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tcounter[i][word_index] |= ((uint64)0x3 << (offset << 2));\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid carry(int index)\n\t{\n\t\tint left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tcounter[i][word_index] |= ((uint64)0x1 << (2 + left_or_right + (offset << 2)));\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif ((value & 3) != 3)\n\t\t\t{\n\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tcounter[i][word_index] &= (~((uint64)0x3 << (offset << 2)));\n\t\t}\n\t}\n\n\tint get_value(int index)\n\t{\n\t\tint left_or_right;\n\t\tint anti_left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\n\t\tint high_value = 0;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tanti_left_or_right = (left_or_right ^ 1);\n\n\t\t\tword_index >>= 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif (((value >> (2 + left_or_right)) & 1) == 0)\n\t\t\t\treturn high_value;\n\n\t\t\thigh_value += ((value & 3) - ((value >> (2 + anti_left_or_right)) & 1)) * (1 << (2 + 2 * i));\n\t\t}\n\t}\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n ~Pyramid_CM_CU_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n /*----optional You can add your function----*/\n};\nREGISTER(Pyramid_CM_CU_Heap);\n#endif//DO NOT change this file", "path": "skbm/new_sketch/sketch/Pyramid_CM_CU_Heap.h"}, {"path": "skbm/new_sketch/sketch/Pyramid_CM_Heap.h", "name": "Pyramid_CM_Heap", "params": [{"field": "word_num", "help": "", "type": "int"}, {"field": "d", "help": "", "type": "int"}, {"field": "word_size", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef Pyramid_CM_Heap_H //must change this MACRO\n#define Pyramid_CM_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <algorithm>\n#include <cstring>\n#include <string.h>\n#include <math.h>\n\n#ifndef _PyramidPARAMS_H\n#define _PyramidPARAMS_H\n\n#define MAX_INSERT_PACKAGE 1000000\n\n#define MAX_HASH_NUM_P 20\n\n#define FILTER_SIZE 32\n\n#define COUNTER_SIZE 16\n\n#define LOW_HASH_NUM 4\n\ntypedef long long lint;\ntypedef unsigned int uint;\ntypedef unsigned long long int uint64;\n#endif //_PARAMS_H\nusing namespace std;\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass Pyramid_CM_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint word_num,d,word_size,capacity;//parameter\n\n\tuint64 *counter[60];\n\tint word_index_size, counter_index_size;\n\tint counter_num;\n\tBOBHash * bobhash[MAX_HASH_NUM_P];\n\t//word_num is the number of words in the first level.\n\n\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK * heap;\n\tint heap_element_num;\n\tunordered_map<string, uint32_t> ht;\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n Pyramid_CM_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"Pyramid_CM_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"word_num\")\n {\n\n\t\t\tword_num = parameterValue;\n return;\n }\n if (parameterName==\"d\")\n {\n d = parameterValue;\n return;\n }\n if (parameterName==\"word_size\")\n {\n\t\t\tword_size = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\theap = new VK[capacity];\n\t\tmemset(heap, 0, capacity * sizeof(VK));\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\theap_element_num = 0;\n\n\n\t\t//for calculating the four hash value constrained in one certain word;\n\t\tword_index_size = 18;\n\n\t\tcounter_index_size = (int)(log(word_size) / log(2)) - 2;//4-8->16-256 counters in one word;\n\t\tcounter_num = (word_num << counter_index_size);\n\n\n\t\tfor (int i = 0; i < 15; i++)\n\t\t{\n\t\t\tcounter[i] = new uint64[word_num >> i];\n\t\t\tmemset(counter[i], 0, sizeof(uint64) * (word_num >> i));\n\t\t}\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tbobhash[i] = new BOBHash;\n\t\t\tbobhash[i]->SetSeed(i + 1000);\n\t\t}\n\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tint min_value = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM];\n\t\tint index[MAX_HASH_NUM];\n\t\tint counter_offset[MAX_HASH_NUM];\n\t\t\n\n\t\tuint64 hash_value = (bobhash[0]->Run(str, len));\n\t\tint my_word_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tint flag = 0xFFFF;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tcounter_offset[i] = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = ((my_word_index << counter_index_size) + counter_offset[i]) % counter_num;\n\t\t\thash_value >>= counter_index_size;\n\n\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\n\t\t\tif (((flag >> counter_offset[i]) & 1) == 0)\n\t\t\t\tcontinue;\n\n\t\t\tflag &= (~(1 << counter_offset[i]));\n\n\t\t\tif (value[i] == 15)\n\t\t\t{\n\t\t\t\tcounter[0][my_word_index] &= (~((uint64)0xF << (counter_offset[i] << 2)));\n\t\t\t\tcarry(index[i]);\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tcounter[0][my_word_index] += ((uint64)0x1 << (counter_offset[i] << 2));\n\t\t\t}\n\t\t}\n\t\t\n\t\t\tint tmin = frequencyQuery(str, len);\n\t\t\tstring str_key = string(str, len);\n\t\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\t\theap[ht[str_key]].first++;\n\t\t\t\theap_adjust_down(ht[str_key]);\n\t\t\t}\n\t\t\telse if (heap_element_num < capacity) {\n\t\t\t\theap[heap_element_num].second = str_key;\n\t\t\t\theap[heap_element_num].first = tmin;\n\t\t\t\tht[str_key] = heap_element_num++;\n\t\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t\t}\n\t\t\telse if (tmin > heap[0].first) {\n\t\t\t\tVK & kv = heap[0];\n\t\t\t\tht.erase(kv.second);\n\t\t\t\tkv.second = str_key;\n\t\t\t\tkv.first = tmin;\n\t\t\t\tht[str_key] = 0;\n\t\t\t\theap_adjust_down(0);\n\t\t\t}\n\t\t\treturn;\n\t\t\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint min_value = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM];\n\t\tint index[MAX_HASH_NUM];\n\t\tint counter_offset[MAX_HASH_NUM];\n\n\t\tuint64 hash_value = (bobhash[0]->Run(str, len));\n\t\tint my_word_index = (hash_value & ((1 << word_index_size) - 1)) % word_num;\n\t\thash_value >>= word_index_size;\n\n\t\tfor (int i = 0; i < d; i++)\n\t\t{\n\t\t\tcounter_offset[i] = (hash_value & 0xFFF) % (1 << counter_index_size);\n\t\t\tindex[i] = ((my_word_index << counter_index_size) + counter_offset[i]) % counter_num;\n\t\t\thash_value >>= counter_index_size;\n\n\t\t\tvalue[i] = (counter[0][my_word_index] >> (counter_offset[i] << 2)) & 0xF;\n\t\t\tvalue[i] += get_value(index[i]);\n\t\t\tmin_value = value[i] < min_value ? value[i] : min_value;\n\t\t}\n\n\n\t\treturn min_value;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\ttopkItem.push_back(make_pair(a[capacity - 1 - i].second, a[capacity - 1 - i].first));\n\n\t\t}\n\t\treturn topkItem;\n /*----optional according to your need----*/\n }\n\tvoid down_carry(int index)\n\t{\n\t\tint left_or_right, up_left_or_right;\n\n\t\tint value, up_value;\n\t\tint word_index = index >> 4, up_word_index;\n\t\tint offset = index & 0xF;\n\t\tint up_offset = offset;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tup_word_index = (word_index >> 1);\n\t\t\tup_left_or_right = up_word_index & 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif ((value & 3) >= 2)\n\t\t\t{\n\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\telse if ((value & 3) == 1)\n\t\t\t{\n\t\t\t\tup_value = (counter[i + 1][up_word_index] >> (up_offset << 2)) & 0xF;\n\n\t\t\t\t//change this layer's flag bit;\n\t\t\t\tif (((up_value >> (2 + up_left_or_right)) & 1) == 0)\n\t\t\t\t{\n\t\t\t\t\tcounter[i][word_index] &= (~((uint64)0x1 << (2 + left_or_right + (offset << 2))));\n\t\t\t\t}\n\n\t\t\t\tcounter[i][word_index] -= ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tcounter[i][word_index] |= ((uint64)0x3 << (offset << 2));\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid carry(int index)\n\t{\n\t\tint left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tword_index >>= 1;\n\n\t\t\tcounter[i][word_index] |= ((uint64)0x1 << (2 + left_or_right + (offset << 2)));\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif ((value & 3) != 3)\n\t\t\t{\n\t\t\t\tcounter[i][word_index] += ((uint64)0x1 << (offset << 2));\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tcounter[i][word_index] &= (~((uint64)0x3 << (offset << 2)));\n\t\t}\n\t}\n\n\tint get_value(int index)\n\t{\n\t\tint left_or_right;\n\t\tint anti_left_or_right;\n\n\t\tint value;\n\t\tint word_index = index >> 4;\n\t\tint offset = index & 0xF;\n\n\n\t\tint high_value = 0;\n\n\t\tfor (int i = 1; i < 15; i++)\n\t\t{\n\n\t\t\tleft_or_right = word_index & 1;\n\t\t\tanti_left_or_right = (left_or_right ^ 1);\n\n\t\t\tword_index >>= 1;\n\n\t\t\tvalue = (counter[i][word_index] >> (offset << 2)) & 0xF;\n\n\t\t\tif (((value >> (2 + left_or_right)) & 1) == 0)\n\t\t\t\treturn high_value;\n\n\t\t\thigh_value += ((value & 3) - ((value >> (2 + anti_left_or_right)) & 1)) * (1 << (2 + 2 * i));\n\t\t}\n\t}\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n ~Pyramid_CM_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n /*----optional You can add your function----*/\n};\nREGISTER(Pyramid_CM_Heap);\n#endif//DO NOT change this file"}, {"path": "skbm/new_sketch/sketch/HeavyGuardian.h", "name": "HeavyGuardian", "params": [{"field": "cell_num", "help": "", "type": "int"}, {"field": "lightcell_num", "help": "", "type": "int"}, {"field": "bucket_num", "help": "", "type": "int"}, {"field": "threshold", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef HeavyGuardian_H //must change this MACRO\n#define HeavyGuardian_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include thiss\n\n/*----optional according to your need----*/\n#include <cmath>\n#include <cstdio>\n#include <cstdlib>\n#include <iostream>\n#include <algorithm>\n#include <string>\n#include <cstring>\n#define G 8\n#define HK_b 1.08\n#define str_len 4\nusing namespace std;\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\nbool ansCMP(std::pair<string, int> a, std::pair<string, int> b)\n{\n\treturn a.second > b.second;\n}\nclass HeavyGuardian: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tstruct node { int C; unsigned int FP; } HK[1000005][G + 2];\n\tint HL[1000005][16];\n\n\tBOBHash * bobhash;//optional DSAB-builtin hashfunction\n\tBOBHash * lighthash;//optional DSAB-builtin hashfunction\n\tint bucket_num;//parameter\n\tint threshold;//parameter\n\tint cell_num;//parameter\n\tint lightcell_num;//parameter\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n\tint cnt;\n\tstring ans[1000005];\n\tvoid ADD(string x) { ans[++cnt] = x; }\n HeavyGuardian()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"HeavyGuardian\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"cell_num\")\n {\n\n cell_num = parameterValue;\n return;\n }\n\t\tif (parameterName == \"lightcell_num\")\n\t\t{\n\n\t\t\tlightcell_num = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"bucket_num\")\n {\n\t\t\tbucket_num = parameterValue;\n return;\n }\n if (parameterName==\"threshold\")\n {\n\t\t\t threshold = parameterValue;\n return;\n }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tcnt = 0; \n\t\tbobhash = new BOBHash();\n\t\tlighthash = new BOBHash();\n\t\tlighthash->SetSeed(750);\n\t\tbobhash->SetSeed(1001);\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tunsigned int H = bobhash->Run(str, len);\n\t\tunsigned int FP = (H >> 16), Hsh = H % bucket_num;\n\t\tbool FLAG = false;\n\t\tfor (int k = 0; k<cell_num; k++)\n\t\t{\n\t\t\tint c = HK[Hsh][k].C;\n\t\t\tif (HK[Hsh][k].FP == FP)\n\t\t\t{\n\t\t\t\tHK[Hsh][k].C++;\n\t\t\t\tif (HK[Hsh][k].C == threshold) ADD(string(str,len));\n\t\t\t\tFLAG = true;\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t\tif (!FLAG)\n\t\t{\n\t\t\tint X, MIN = 1000000000;\n\t\t\tfor (int k = 0; k<cell_num; k++)\n\t\t\t{\n\t\t\t\tint c = HK[Hsh][k].C;\n\t\t\t\tif (c<MIN) { MIN = c; X = k; }\n\t\t\t}\n\t\t\tif (!(rand() % int(pow(HK_b, HK[Hsh][X].C))))\n\t\t\t{\n\t\t\t\tHK[Hsh][X].C--;\n\t\t\t\tif (HK[Hsh][X].C <= 0)\n\t\t\t\t{\n\t\t\t\t\tHK[Hsh][X].FP = FP;\n\t\t\t\t\tHK[Hsh][X].C = 1;\n\t\t\t\t\tFLAG = true;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t\tif (!FLAG)\n\t\t{\n\t\t unsigned int pos = lighthash->Run(str, len) % lightcell_num;\n\t\t HL[Hsh][pos] += 1;\n\t\t}\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tunsigned int H = bobhash->Run(str, len);\n\t\tunsigned int FP = (H >> 16), Hsh = H % bucket_num;\n\t\tfor (int k = 0; k<cell_num; k++)\n\t\t{\n\t\t\tint c = HK[Hsh][k].C;\n\t\t\tif (HK[Hsh][k].FP == FP) return HK[Hsh][k].C;\n\t\t}\n\t\tunsigned int pos = lighthash->Run(str, len) % lightcell_num;\n\t\treturn HL[Hsh][pos];\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\tstd::vector<std::pair <std::string, int> > ansItem;\n\t\tfor (int i = 1; i <= cnt; ++i)\n\t\t{\n\t\t\tansItem.push_back(make_pair(ans[i], frequencyQuery(ans[i].c_str(),str_len)));\n\t\t}\n\t\tsort(ansItem.begin(), ansItem.end(), ansCMP);\n\t\tint t = cnt > k ? k : cnt;\n\t\tfor (int i = 0; i < t; ++i)\n\t\t{\n\t\t\ttopkItem.push_back(ansItem[i]);\n\t\t}\n return topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n\t\tcnt = 0;\n\t\tmemset(HK, 0, sizeof(HK));\n /*----optional according to your need----*/\n }\n ~HeavyGuardian()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(HeavyGuardian);\n#endif//DO NOT change this file"}, {"path": "skbm/new_sketch/sketch/CM_Heap.h", "name": "CM_Heap", "params": [{"field": "hash_num", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}, {"field": "memory_in_bytes", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef CMHEAP_H //must change this MACRO\n#define CMHEAP_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\" //If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <algorithm>\n#include <sstream>\nusing std::min;\nusing std::swap;\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\ndefine: e.g. BOBHash myhash\nsetseed: e.g. myhash.SetSeed(1001)\ncalculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE: \ndefine: cuckoo::CuckooHashing<key_len, capacity> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase() = 0;\nvirtual void parameterSet(const string& parameterName, double parameterValue)=0;\nvirtual init() = 0;\nvirtual void Insert(const char *str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > frequencyQuery(const char *str, const int & len) = 0;\nvirtual vector<string> topkQuery(const int & k) = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\nbool CMHcmp(const pair<string, uint32_t> a, const pair<string, uint32_t> b) {\n\treturn a.second > b.second;\n}\nclass CM_Heap : public SketchBase {\nprivate:\n\t\n\t/*----optional according to your need----*/\n\tint hash_num;//parameter\n\tint memory_in_bytes;//parameter\n\tint capacity;//parameter\n\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK * heap;\n\tint heap_element_num;\n\n\n\tint w;\n\tint ** cm_sketch;\n\tBOBHash * hash;\n\t//cuckoo::CuckooHashing<4> ht;\n\tunordered_map<string, uint32_t> ht;\n\t/*----optional according to your need----*/\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\npublic:\n\tusing SketchBase::sketch_name;//DO NOT change this declaration\n\tCM_Heap()\n\t{\n\t\t/*constructed function MUST BT non-parameter!!!*/\n\t\tsketch_name = \"CM_Heap\";//please keep sketch_name the same as class name and .h file name\n\t}\n\tvoid parameterSet(const std::string& parameterName, double parameterValue)\n\t{\n\t\t/*MUST have this function even empty function body*/\n\n\t\t/*----optional according to your need----*/\n\t\tif (parameterName == \"hash_num\")\n\t\t{\n\n\t\t\thash_num = parameterValue;\n\t\t\treturn;\n\t\t}\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n\t\tif (parameterName == \"memory_in_bytes\")\n\t\t{\n\t\t\tmemory_in_bytes = parameterValue;\n\t\t\treturn;\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid init()\n\t{\n\t\t/*MUST have this function even empty function body*/\n\n\t\t/*----optional according to your need----*/\n\t\theap_element_num = 0;\n\t\tint sketchMem = memory_in_bytes - capacity * 8;\n\t\tw = sketchMem / 4 / hash_num;\n\t\theap = new VK[capacity];\n\t\tmemset(heap, 0, capacity * sizeof(VK));\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\theap_element_num = 0;\n\t\tcm_sketch = new int*[hash_num];\n\t\thash = new BOBHash[hash_num];\n\t\tfor (int i = 0; i<hash_num; ++i)\n\t\t{\n\t\t\tcm_sketch[i] = new int[w];\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int)*w);\n\t\t\trandom_device rd;\n\t\t\thash[i].SetSeed(uint32_t(rd() % MAX_PRIME32));\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid Insert(const char *str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change parameter type*/\n\n\t\t/*----optional according to your need----*/\n\t\tint tmin = 1 << 30, ans = tmin;\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tint idx = hash[i].Run(str, len) % w;\n\t\t\tcm_sketch[i][idx]++;\n\t\t\tint val = cm_sketch[i][idx];\n\t\t\tans = std::min(val, ans);\n\t\t}\n\n\t\ttmin = ans;\n\t\tstring str_key = string(str, len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint tmin = 1 << 30, ans = tmin;\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tint idx = hash[i].Run(str, len) % w;\n\t\t\tint val = cm_sketch[i][idx];\n\t\t\tans = std::min(val, ans);\n\t\t}\n\t\treturn ans;\n\t\t/*----optional according to your need----*/\n\t}\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\ttopkItem.push_back(make_pair(a[capacity - 1 - i].second, a[capacity - 1 - i].first));\n\n\t\t}\n\t\treturn topkItem;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid reset()\n\t{\n\t\t/*MUST have this function,reset sketch to the initial state */\n\n\t\t/*----optional according to your need----*/\n\t\theap_element_num = 0;\n\t\tfor (int i; i<hash_num; ++i)\n\t\t{\n\t\t\tmemset(cm_sketch[i], 0, sizeof(int) * w);\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\t~CM_Heap()\n\t{\n\t\t/*MUST have this function */\n\n\t\t/*----optional according to your need----*/\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tdelete [] cm_sketch[i];\n\t\t}\n\t\tdelete [] hash;\n\t\tdelete [] cm_sketch;\n\t\treturn;\n\t\t/*----optional according to your need----*/\n\t}\n\n\t/*----optional You can add your function----*/\n};\nREGISTER(CM_Heap);\n#endif//DO NOT change this file"}, {"path": "skbm/new_sketch/sketch/CM_CU.h", "name": "CM_CU", "params": [{"field": "hash_num", "help": "", "type": "int"}, {"field": "memory_in_bytes", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef CM_CU_SKETCH_H\n#define CM_CU_SKETCH_H\n#include \"SketchBase.h\"\n#include \"factor.h\"\n#include \"../hash/hashfunction.h\"\n#include<string>\n#include<iostream>\n#include<memory.h>\n/*\nint is type of frequencyQuery Return\nhashfunction type:BOBHash\\\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\nclass CM_CU : public SketchBase {\nprivate:\n\tint hash_num;//parameter\n\tint memory_in_bytes;//parameter\n\tint w = 0;\n\tBOBHash * hash;\n\tint **data;\npublic:\n\tusing SketchBase::sketch_name;\n\tCM_CU()\n\t{\n\t\tsketch_name = \"CM_CU\";\n\t}\n\tvoid parameterSet(const std::string& parameterName, double parameterValue)\n\t{\n\n\t\tif (parameterName == \"hash_num\")\n\t\t{\n\n\t\t\thash_num = parameterValue;\n\t\t\treturn;\n\t\t}\n\t\tif (parameterName == \"memory_in_bytes\")\n\t\t{\n\t\t\tmemory_in_bytes = parameterValue;\n\t\t\treturn;\n\t\t}\n\t}\n\tvoid init()\n\t{\n\t\tw = memory_in_bytes / 4 / hash_num;\n\t\tdata = new int*[hash_num];\n\t\thash = new BOBHash[hash_num];\n\t\tfor (int i = 0; i<hash_num; ++i)\n\t\t{\n\t\t\tdata[i] = new int[w];\n\t\t\tmemset(data[i], 0, sizeof(int)*w);\n\t\t\thash[i].SetSeed(i + 750);\n\t\t}\n\n\t}\n\tvoid Insert(const char * str, const int & len)\n\t{\n\n\t\tint tmin = 1 << 30;\n\t\tint idxs[100];\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tidxs[i] = hash[i].Run(str, len) % w;\n\t\t\ttmin = min(data[i][idxs[i]], tmin);\n\t\t}\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tif (data[i][idxs[i]] == tmin)\n\t\t\t\t++data[i][idxs[i]];\n\t\t}\n\t}\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\tint res = data[0][hash[0].Run(str, len) % w];\n\t\tfor (int i = 1; i < hash_num; ++i) {\n\t\t\tint t = data[i][hash[i].Run(str, len) % w];\n\t\t\tres = res < t ? res : t;\n\t\t}\n\t\treturn res;\n\t}\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n\t{\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\t\treturn topkItem;\n\t}\n\tvoid reset()\n\t{\n\t\tfor (int i; i<hash_num; ++i)\n\t\t{\n\t\t\tmemset(data[i], 0, sizeof(int)*w);\n\t\t}\n\t}\n\t~CM_CU()\n\t{\n\t\tfor (int i; i<hash_num; ++i)\n\t\t{\n\t\t\tdelete[] data[i];\n\t\t}\n\t\tdelete[] data;\n\t}\n};\nREGISTER(CM_CU);\n#endif"}, {"path": "skbm/new_sketch/sketch/ColdFilter_CM_Heap.h", "name": "ColdFilter_CM_Heap", "params": [{"field": "total_memory_in_bytes", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}, {"field": "filter_memory_percent", "help": "", "type": "int"}, {"field": "bucket_num", "help": "", "type": "int"}, {"field": "counter_num", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef ColdFilter_CM_Heap_H //must change this MACRO\n#define ColdFilter_CM_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n#include \"../sketch/CM_Heap.h\"\n#pragma pack (16)\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n\n\n\n\n#ifndef _SC_H\n#define _SC_H\n\n//#include \"params.h\"\n#include <cstring>\n#include <algorithm>\n#include <emmintrin.h>\n#include <immintrin.h>\n#include <stdexcept>\n#include <x86intrin.h>\nusing namespace std;\n\n#define MAX_HASH_NUM_SC 4\n\n\ntemplate<int threshold = 240, int l1_ratio = 65>\nclass StreamClassifier\n{\n\t// static constexpr int bucket_num = 1000;\n\t// static constexpr int counter_num = 16;\n\tint memory_in_bytes,bucket_num,counter_num;\n\t\n\tint buffer_size;\n\tint remained;\n\tint d1;\n\tint m1_in_bytes;\n\tint d2;\n\tint m2_in_bytes;\n\n\n\t//uint32_t **ID;\n\t//int **counter;\n\t//int *cur_pos;\n\tuint32_t ID[1005][16] __attribute__((aligned(16)));\n\tint counter[1005][16];\n\tint cur_pos[1005];\n\n\tint w1;\n\tint w_word;\n\tint w2;\n\tuint64_t * L1;\n\tuint16_t * L2;\n\t//uint64_t L1[m1_in_bytes * 8 / 4 / 16]; // Layer 1 is organized as word, one word contains 16 counter, one counter consist of 4 bit\n\t//uint16_t L2[m2_in_bytes * 8 / 16]; // Layer 2 is organized as counter, one counter consist of 16 bit\n\n\tSketchBase * spa;\n\n\tBOBHash * bobhash1;\n\tBOBHash ** bobhash2;\n\n\tint cur_kick;\n\n\tvoid clear_data()\n\t{\n\t\tcur_kick = 0;\n\t\tmemset(ID, 0, sizeof(ID));\n\t\tmemset(counter, 0, sizeof(counter));\n\t\tmemset(cur_pos, 0, sizeof(cur_pos));\n\t\tmemset(L1, 0, sizeof(L1));\n\t\tmemset(L2, 0, sizeof(L2));\n\t}\npublic:\n\tStreamClassifier(int memory_in_bytes__,int bucket_num__,int counter_num__):memory_in_bytes(memory_in_bytes__),bucket_num(bucket_num__),counter_num(counter_num__)\n\t{\n\t\tbuffer_size = bucket_num * counter_num * 8;\n\t\tremained = memory_in_bytes - buffer_size;\n\n\t\td1 = 3;\n\t\tm1_in_bytes = int(remained * l1_ratio / 100.0);\n\t\td2 = 3;\n\t\tm2_in_bytes = int(remained * (100 - l1_ratio) / 100.0);\n\t\t//ID = new uint32_t*[bucket_num];\n\t\t//counter = new int*[bucket_num];\n\t\t//cur_pos = new int[bucket_num];\n\t\t//for (int i = 0; i < bucket_num; ++i)\n\t\t//{\n\n\t\t//\tchar* buf = new char[counter_num*4 + 16];\n\t\t//\tID[i] = (uint32_t *)(((uint64_t)buf + 15) & ~15);\n\t\t//\t//ID[i] = new uint32_t[counter_num];\n\t\t//\tcounter[i] = new int[counter_num];\n\t\t//}\n\t\tw1 = m1_in_bytes * 8 / 4;\n\t\tw_word = m1_in_bytes * 8 / 4 / 16;\n\t\tw2 = m2_in_bytes * 8 / 16;\n\t\t\n\t\tL1 = new uint64_t[m1_in_bytes * 8 / 4 / 16];\n\t\tL2 = new uint16_t[m2_in_bytes * 8 / 16];\n\n\t\tbobhash2 = new BOBHash*[d2];\n\t\tbobhash1 = new BOBHash;\n\t\tbobhash1->SetSeed(500);\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tbobhash2[i] = new BOBHash;\n\t\t\tbobhash2[i]->SetSeed(1000 + i);\n\t\t}\n\t\tclear_data();\n\t\tspa = NULL;\n\t}\n\n\tvoid print_basic_info()\n\t{\n\t\tprintf(\"Stream Classifer\\n\");\n\t\tprintf(\"\\tSIMD buffer: %d counters, %.4lf MB occupies\\n\", bucket_num * counter_num, bucket_num * counter_num * 8.0 / 1024 / 1024);\n\t\tprintf(\"\\tL1: %d counters, %.4lf MB occupies\\n\", w1, w1 * 0.5 / 1024 / 1024);\n\t\tprintf(\"\\tL2: %d counters, %.4lf MB occupies\\n\", w2, w2 * 2.0 / 1024 / 1024);\n\t}\n\n\t~StreamClassifier()\n\t{\n\t\tdelete bobhash1;\n\t\tfor (int i = 0; i < d2; i++)\n\t\t\tdelete bobhash2[i];\n\t}\n\n\t//periodical refreshing for continuous top-k;\n\tvoid init_array_period()\n\t{\n\t\tfor (int i = 0; i < w_word; i++) {\n\t\t\tuint64_t temp = L1[i];\n\n\t\t\ttemp = (temp & (0xF)) == 0xF ? temp : (temp & 0xFFFFFFFFFFFFFFF0);\n\t\t\ttemp = (temp & (0xF0)) == 0xF0 ? temp : (temp & 0xFFFFFFFFFFFFFF0F);\n\t\t\ttemp = (temp & (0xF00)) == 0xF00 ? temp : (temp & 0xFFFFFFFFFFFFF0FF);\n\t\t\ttemp = (temp & (0xF000)) == 0xF000 ? temp : (temp & 0xFFFFFFFFFFFF0FFF);\n\n\t\t\ttemp = (temp & (0xF0000)) == 0xF0000 ? temp : (temp & 0xFFFFFFFFFFF0FFFF);\n\t\t\ttemp = (temp & (0xF00000)) == 0xF00000 ? temp : (temp & 0xFFFFFFFFFF0FFFFF);\n\t\t\ttemp = (temp & (0xF000000)) == 0xF000000 ? temp : (temp & 0xFFFFFFFFF0FFFFFF);\n\t\t\ttemp = (temp & (0xF0000000)) == 0xF0000000 ? temp : (temp & 0xFFFFFFFF0FFFFFFF);\n\n\t\t\ttemp = (temp & (0xF00000000)) == 0xF00000000 ? temp : (temp & 0xFFFFFFF0FFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000)) == 0xF000000000 ? temp : (temp & 0xFFFFFF0FFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000)) == 0xF0000000000 ? temp : (temp & 0xFFFFF0FFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000)) == 0xF00000000000 ? temp : (temp & 0xFFFF0FFFFFFFFFFF);\n\n\t\t\ttemp = (temp & (0xF000000000000)) == 0xF000000000000 ? temp : (temp & 0xFFF0FFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000000)) == 0xF0000000000000 ? temp : (temp & 0xFF0FFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000000)) == 0xF00000000000000 ? temp : (temp & 0xF0FFFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000000000)) == 0xF000000000000000 ? temp : (temp & 0x0FFFFFFFFFFFFFFF);\n\n\t\t\tL1[i] = temp;\n\t\t}\n\n\t\tfor (int i = 0; i < w2; i++) {\n\t\t\tshort int temp = L2[i];\n\t\t\tL2[i] = (temp == threshold) ? temp : 0;\n\t\t}\n\t}\n\n\tvoid init_array_all()\n\t{\n\t\tmemset(L1, 0, sizeof(uint64_t) * w_word);\n\t\tmemset(L2, 0, sizeof(short int) * w2);\n\t}\n\n\tvoid init_spa(SketchBase * _spa)\n\t{\n\t\tspa = _spa;\n\t}\n\n\tvoid insert(const char * str)\n\t{\n\t\tuint32_t key;\n\t\tmemcpy(&key, str, 4);\n\t\tint bucket_id = key % bucket_num;\n\t\t// int bucket_id = key & 0x2FF;\n\n\t\t// the code below assume counter per buckets is 16\n\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\n\t\tconst __m128i item = _mm_set1_epi32((int)key);\n\t\tint matched;\n\n\t\tif (counter_num == 16) {\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tmatched = _mm_movemask_epi8(a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse if (counter_num == 4) {\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n\t\t\tcout << bucket_id << endl;\n#endif // DEBUG\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\tmatched = _mm_movemask_ps(*(__m128 *)&a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse {\n\t\t\tthrow std::logic_error(\"Not implemented.\");\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tif (matched != 0) {\n\t\t\t//return 32 if input is zero;\n\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched);\n\n\t\t\t++counter[bucket_id][matched_index];\n\t\t\treturn;\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint cur_pos_now = cur_pos[bucket_id];\n\t\tif (cur_pos_now != counter_num) {\n\t\t\t// printf(\"%d\\n\", cur_pos_now);\n\t\t\tID[bucket_id][cur_pos_now] = key;\n\t\t\t++counter[bucket_id][cur_pos_now];\n\t\t\t++cur_pos[bucket_id];\n\t\t\treturn;\n\t\t}\n\n\t\t/****************randomly choose one counter to kick!******************/\n\t\tchar tmp2[4];\n\t\tmemcpy(tmp2, &ID[bucket_id][cur_kick], 4);\n\t\tinsert_SC(tmp2, counter[bucket_id][cur_kick]);\n\t\tID[bucket_id][cur_kick] = key;\n\t\tcounter[bucket_id][cur_kick] = 1;\n\n\t\tcur_kick = (cur_kick + 1) % counter_num;\n\t}\n\n\tvoid insert_SC(const char * kick_ID, int kick_f)\n\t{\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint v1 = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM_SC];\n\t\tint index[MAX_HASH_NUM_SC];\n\t\tint offset[MAX_HASH_NUM_SC];\n\n\t\tuint64_t hash_value = bobhash1->Run(kick_ID, 4);\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\toffset[i] = (hash_value & 0xF);\n\t\t\tvalue[i] = (temp >> (offset[i] << 2)) & 0xF;\n\t\t\tv1 = std::min(v1, value[i]);\n\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tint temp2 = v1 + kick_f;\n\t\tif (temp2 <= 15) { // maybe optimized use SIMD?\n\t\t\tfor (int i = 0; i < d1; i++) {\n\t\t\t\tint temp3 = ((temp >> (offset[i] << 2)) & 0xF);\n\t\t\t\tif (temp3 < temp2) {\n\t\t\t\t\ttemp += ((uint64_t)(temp2 - temp3) << (offset[i] << 2));\n\t\t\t\t}\n\t\t\t}\n\t\t\tL1[word_index] = temp;\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\ttemp |= ((uint64_t)0xF << (offset[i] << 2));\n\t\t}\n\t\tL1[word_index] = temp;\n\n\t\tint delta1 = 15 - v1;\n\t\tkick_f -= delta1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tindex[i] = (bobhash2[i]->Run(kick_ID, 4)) % w2;\n\t\t\tvalue[i] = L2[index[i]];\n\t\t\tv2 = std::min(value[i], v2);\n\t\t}\n\n\t\ttemp2 = v2 + kick_f;\n\t\tif (temp2 <= threshold) {\n\t\t\tfor (int i = 0; i < d2; i++) {\n\t\t\t\tL2[index[i]] = (L2[index[i]] > temp2) ? L2[index[i]] : temp2;\n\t\t\t}\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tL2[index[i]] = threshold;\n\t\t}\n\n\t\tint delta2 = threshold - v2;\n\t\tkick_f -= delta2;\n\t\tfor (int i = 0; i < kick_f; ++i)\n\t\t{\n\t\t\tspa->Insert(kick_ID, 4);\n\t\t}\n\t}\n\n\tvoid refresh()\n\t{\n\t\tfor (int i = 0; i < bucket_num; i++) {\n\t\t\tfor (int j = 0; j < counter_num; j++) {\n\t\t\t\tinsert_SC(ID[i][j], counter[i][j]);\n\t\t\t\tID[i][j] = counter[i][j] = 0;\n\t\t\t}\n\t\t\tcur_pos[i] = 0;\n\t\t}\n\t\treturn;\n\t}\n\n\tint query(const char * key)\n\t{\n\t\tint v1 = 1 << 30;\n\n\t\t// constexpr int max_d = d1 > d2 ? d1 : d2;\n\t\t// int value[max_d];\n\t\t// int index[max_d];\n\t\t// int offset[max_d];\n\n\t\tuint32_t hash_value = (bobhash1->Run(key, 4));\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\tint of, val;\n\t\t\tof = (hash_value & 0xF);\n\t\t\tval = (temp >> (of << 2)) & 0xF;\n\t\t\tv1 = std::min(val, v1);\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tif (v1 != 15)\n\t\t\treturn v1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tint index = (bobhash2[i]->Run(key, 4)) % w2;\n\t\t\tint value = L2[index];\n\t\t\tv2 = std::min(value, v2);\n\t\t}\n\n\t\treturn v1 + v2;\n\t}\n};\n\n#endif//_SC_H\n/*----optional according to your need----*/\n\n#define T1 15\n#define T2 241\n#define THRESHOLD (T1 + T2)\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ColdFilter_CM_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint total_memory_in_bytes;//parameter\n\tint filter_memory_percent;//parameter\n\tint bucket_num;//parameter\n\tint counter_num;//parameter\n\tint capacity;\n\tStreamClassifier<T2, 65> *sc;\n\tCM_Heap sketch;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ColdFilter_CM_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ColdFilter_CM_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"total_memory_in_bytes\")\n {\n\n\t\t\ttotal_memory_in_bytes = parameterValue;\n return;\n }\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"filter_memory_percent\")\n {\n\t\t\tfilter_memory_percent= parameterValue;\n return;\n }\n if (parameterName==\"bucket_num\")\n {\n\t\t\t bucket_num = parameterValue;\n return;\n }\n\t\t if (parameterName == \"counter_num\")\n\t\t {\n\t\t\t counter_num = parameterValue;\n\t\t\t return;\n\t\t }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tsc = new StreamClassifier<T2, 65>(int64_t(total_memory_in_bytes) * filter_memory_percent / 100, bucket_num, counter_num);\n\t\tsketch.parameterSet(\"memory_in_bytes\", int((total_memory_in_bytes) * (100 - filter_memory_percent) / 100));\n\t\tsketch.parameterSet(\"hash_num\",3);\n\t\tsketch.parameterSet(\"capacity\", capacity);\n\t\tsketch.init();\n\t\tsc->init_spa(&sketch);\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tsc->insert(str);\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint ret = sc->query(str);\n\t\tif (ret == THRESHOLD)\n\t\t\tret += sketch.frequencyQuery(str,len);\n\n\t\treturn ret;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n return sketch.topkQuery(k);\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n \n /*----optional according to your need----*/\n }\n ~ColdFilter_CM_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\tdelete sc;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ColdFilter_CM_Heap);\n#endif//DO NOT change this file"}, {"path": "skbm/new_sketch/sketch/ColdFilter_CM_CU.h", "name": "ColdFilter_CM_CU", "params": [{"field": "total_memory_in_bytes", "help": "", "type": "int"}, {"field": "filter_memory_percent", "help": "", "type": "int"}, {"field": "bucket_num", "help": "", "type": "int"}, {"field": "counter_num", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef ColdFilter_CM_CU_H //must change this MACRO\n#define ColdFilter_CM_CU_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n#include \"../sketch/CM_CU.h\"\n#pragma pack (16)\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n\n\n\n\n#ifndef _SC_H\n#define _SC_H\n\n//#include \"params.h\"\n#include <cstring>\n#include <algorithm>\n#include <emmintrin.h>\n#include <immintrin.h>\n#include <stdexcept>\n#include <x86intrin.h>\nusing namespace std;\n\n#define MAX_HASH_NUM_SC 4\n\n\ntemplate<int threshold = 240, int l1_ratio = 65>\nclass StreamClassifier\n{\n\t// static constexpr int bucket_num = 1000;\n\t// static constexpr int counter_num = 16;\n\tint memory_in_bytes,bucket_num,counter_num;\n\t\n\tint buffer_size;\n\tint remained;\n\tint d1;\n\tint m1_in_bytes;\n\tint d2;\n\tint m2_in_bytes;\n\n\n\t//uint32_t **ID;\n\t//int **counter;\n\t//int *cur_pos;\n\tuint32_t ID[1005][16] __attribute__((aligned(16)));\n\tint counter[1005][16];\n\tint cur_pos[1005];\n\n\tint w1;\n\tint w_word;\n\tint w2;\n\tuint64_t * L1;\n\tuint16_t * L2;\n\t//uint64_t L1[m1_in_bytes * 8 / 4 / 16]; // Layer 1 is organized as word, one word contains 16 counter, one counter consist of 4 bit\n\t//uint16_t L2[m2_in_bytes * 8 / 16]; // Layer 2 is organized as counter, one counter consist of 16 bit\n\n\tSketchBase * spa;\n\n\tBOBHash * bobhash1;\n\tBOBHash ** bobhash2;\n\n\tint cur_kick;\n\n\tvoid clear_data()\n\t{\n\t\tcur_kick = 0;\n\t\tmemset(ID, 0, sizeof(ID));\n\t\tmemset(counter, 0, sizeof(counter));\n\t\tmemset(cur_pos, 0, sizeof(cur_pos));\n\t\tmemset(L1, 0, sizeof(L1));\n\t\tmemset(L2, 0, sizeof(L2));\n\t}\npublic:\n\tStreamClassifier(int memory_in_bytes__,int bucket_num__,int counter_num__):memory_in_bytes(memory_in_bytes__),bucket_num(bucket_num__),counter_num(counter_num__)\n\t{\n\t\tbuffer_size = bucket_num * counter_num * 8;\n\t\tremained = memory_in_bytes - buffer_size;\n\n\t\td1 = 3;\n\t\tm1_in_bytes = int(remained * l1_ratio / 100.0);\n\t\td2 = 3;\n\t\tm2_in_bytes = int(remained * (100 - l1_ratio) / 100.0);\n\t\t//ID = new uint32_t*[bucket_num];\n\t\t//counter = new int*[bucket_num];\n\t\t//cur_pos = new int[bucket_num];\n\t\t//for (int i = 0; i < bucket_num; ++i)\n\t\t//{\n\n\t\t//\tchar* buf = new char[counter_num*4 + 16];\n\t\t//\tID[i] = (uint32_t *)(((uint64_t)buf + 15) & ~15);\n\t\t//\t//ID[i] = new uint32_t[counter_num];\n\t\t//\tcounter[i] = new int[counter_num];\n\t\t//}\n\t\tw1 = m1_in_bytes * 8 / 4;\n\t\tw_word = m1_in_bytes * 8 / 4 / 16;\n\t\tw2 = m2_in_bytes * 8 / 16;\n\t\t\n\t\tL1 = new uint64_t[m1_in_bytes * 8 / 4 / 16];\n\t\tL2 = new uint16_t[m2_in_bytes * 8 / 16];\n\n\t\tbobhash2 = new BOBHash*[d2];\n\t\tbobhash1 = new BOBHash;\n\t\tbobhash1->SetSeed(500);\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tbobhash2[i] = new BOBHash;\n\t\t\tbobhash2[i]->SetSeed(1000 + i);\n\t\t}\n\t\tclear_data();\n\t\tspa = NULL;\n\t}\n\n\tvoid print_basic_info()\n\t{\n\t\tprintf(\"Stream Classifer\\n\");\n\t\tprintf(\"\\tSIMD buffer: %d counters, %.4lf MB occupies\\n\", bucket_num * counter_num, bucket_num * counter_num * 8.0 / 1024 / 1024);\n\t\tprintf(\"\\tL1: %d counters, %.4lf MB occupies\\n\", w1, w1 * 0.5 / 1024 / 1024);\n\t\tprintf(\"\\tL2: %d counters, %.4lf MB occupies\\n\", w2, w2 * 2.0 / 1024 / 1024);\n\t}\n\n\t~StreamClassifier()\n\t{\n\t\tdelete bobhash1;\n\t\tfor (int i = 0; i < d2; i++)\n\t\t\tdelete bobhash2[i];\n\t}\n\n\t//periodical refreshing for continuous top-k;\n\tvoid init_array_period()\n\t{\n\t\tfor (int i = 0; i < w_word; i++) {\n\t\t\tuint64_t temp = L1[i];\n\n\t\t\ttemp = (temp & (0xF)) == 0xF ? temp : (temp & 0xFFFFFFFFFFFFFFF0);\n\t\t\ttemp = (temp & (0xF0)) == 0xF0 ? temp : (temp & 0xFFFFFFFFFFFFFF0F);\n\t\t\ttemp = (temp & (0xF00)) == 0xF00 ? temp : (temp & 0xFFFFFFFFFFFFF0FF);\n\t\t\ttemp = (temp & (0xF000)) == 0xF000 ? temp : (temp & 0xFFFFFFFFFFFF0FFF);\n\n\t\t\ttemp = (temp & (0xF0000)) == 0xF0000 ? temp : (temp & 0xFFFFFFFFFFF0FFFF);\n\t\t\ttemp = (temp & (0xF00000)) == 0xF00000 ? temp : (temp & 0xFFFFFFFFFF0FFFFF);\n\t\t\ttemp = (temp & (0xF000000)) == 0xF000000 ? temp : (temp & 0xFFFFFFFFF0FFFFFF);\n\t\t\ttemp = (temp & (0xF0000000)) == 0xF0000000 ? temp : (temp & 0xFFFFFFFF0FFFFFFF);\n\n\t\t\ttemp = (temp & (0xF00000000)) == 0xF00000000 ? temp : (temp & 0xFFFFFFF0FFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000)) == 0xF000000000 ? temp : (temp & 0xFFFFFF0FFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000)) == 0xF0000000000 ? temp : (temp & 0xFFFFF0FFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000)) == 0xF00000000000 ? temp : (temp & 0xFFFF0FFFFFFFFFFF);\n\n\t\t\ttemp = (temp & (0xF000000000000)) == 0xF000000000000 ? temp : (temp & 0xFFF0FFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000000)) == 0xF0000000000000 ? temp : (temp & 0xFF0FFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000000)) == 0xF00000000000000 ? temp : (temp & 0xF0FFFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000000000)) == 0xF000000000000000 ? temp : (temp & 0x0FFFFFFFFFFFFFFF);\n\n\t\t\tL1[i] = temp;\n\t\t}\n\n\t\tfor (int i = 0; i < w2; i++) {\n\t\t\tshort int temp = L2[i];\n\t\t\tL2[i] = (temp == threshold) ? temp : 0;\n\t\t}\n\t}\n\n\tvoid init_array_all()\n\t{\n\t\tmemset(L1, 0, sizeof(uint64_t) * w_word);\n\t\tmemset(L2, 0, sizeof(short int) * w2);\n\t}\n\n\tvoid init_spa(SketchBase * _spa)\n\t{\n\t\tspa = _spa;\n\t}\n\n\tvoid insert(const char * str)\n\t{\n\t\tuint32_t key;\n\t\tmemcpy(&key, str, 4);\n\t\tint bucket_id = key % bucket_num;\n\t\t// int bucket_id = key & 0x2FF;\n\n\t\t// the code below assume counter per buckets is 16\n\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\n\t\tconst __m128i item = _mm_set1_epi32((int)key);\n\t\tint matched;\n\n\t\tif (counter_num == 16) {\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tmatched = _mm_movemask_epi8(a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse if (counter_num == 4) {\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n\t\t\tcout << bucket_id << endl;\n#endif // DEBUG\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\tmatched = _mm_movemask_ps(*(__m128 *)&a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse {\n\t\t\tthrow std::logic_error(\"Not implemented.\");\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tif (matched != 0) {\n\t\t\t//return 32 if input is zero;\n\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched);\n\n\t\t\t++counter[bucket_id][matched_index];\n\t\t\treturn;\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint cur_pos_now = cur_pos[bucket_id];\n\t\tif (cur_pos_now != counter_num) {\n\t\t\t// printf(\"%d\\n\", cur_pos_now);\n\t\t\tID[bucket_id][cur_pos_now] = key;\n\t\t\t++counter[bucket_id][cur_pos_now];\n\t\t\t++cur_pos[bucket_id];\n\t\t\treturn;\n\t\t}\n\n\t\t/****************randomly choose one counter to kick!******************/\n\t\tchar tmp2[4];\n\t\tmemcpy(tmp2, &ID[bucket_id][cur_kick], 4);\n\t\tinsert_SC(tmp2, counter[bucket_id][cur_kick]);\n\t\tID[bucket_id][cur_kick] = key;\n\t\tcounter[bucket_id][cur_kick] = 1;\n\n\t\tcur_kick = (cur_kick + 1) % counter_num;\n\t}\n\n\tvoid insert_SC(const char * kick_ID, int kick_f)\n\t{\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint v1 = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM_SC];\n\t\tint index[MAX_HASH_NUM_SC];\n\t\tint offset[MAX_HASH_NUM_SC];\n\n\t\tuint64_t hash_value = bobhash1->Run(kick_ID, 4);\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\toffset[i] = (hash_value & 0xF);\n\t\t\tvalue[i] = (temp >> (offset[i] << 2)) & 0xF;\n\t\t\tv1 = std::min(v1, value[i]);\n\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tint temp2 = v1 + kick_f;\n\t\tif (temp2 <= 15) { // maybe optimized use SIMD?\n\t\t\tfor (int i = 0; i < d1; i++) {\n\t\t\t\tint temp3 = ((temp >> (offset[i] << 2)) & 0xF);\n\t\t\t\tif (temp3 < temp2) {\n\t\t\t\t\ttemp += ((uint64_t)(temp2 - temp3) << (offset[i] << 2));\n\t\t\t\t}\n\t\t\t}\n\t\t\tL1[word_index] = temp;\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\ttemp |= ((uint64_t)0xF << (offset[i] << 2));\n\t\t}\n\t\tL1[word_index] = temp;\n\n\t\tint delta1 = 15 - v1;\n\t\tkick_f -= delta1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tindex[i] = (bobhash2[i]->Run(kick_ID, 4)) % w2;\n\t\t\tvalue[i] = L2[index[i]];\n\t\t\tv2 = std::min(value[i], v2);\n\t\t}\n\n\t\ttemp2 = v2 + kick_f;\n\t\tif (temp2 <= threshold) {\n\t\t\tfor (int i = 0; i < d2; i++) {\n\t\t\t\tL2[index[i]] = (L2[index[i]] > temp2) ? L2[index[i]] : temp2;\n\t\t\t}\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tL2[index[i]] = threshold;\n\t\t}\n\n\t\tint delta2 = threshold - v2;\n\t\tkick_f -= delta2;\n\t\tfor (int i = 0; i < kick_f; ++i)\n\t\t{\n\t\t\tspa->Insert(kick_ID, 4);\n\t\t}\n\t}\n\n\tvoid refresh()\n\t{\n\t\tfor (int i = 0; i < bucket_num; i++) {\n\t\t\tfor (int j = 0; j < counter_num; j++) {\n\t\t\t\tinsert_SC(ID[i][j], counter[i][j]);\n\t\t\t\tID[i][j] = counter[i][j] = 0;\n\t\t\t}\n\t\t\tcur_pos[i] = 0;\n\t\t}\n\t\treturn;\n\t}\n\n\tint query(const char * key)\n\t{\n\t\tint v1 = 1 << 30;\n\n\t\t// constexpr int max_d = d1 > d2 ? d1 : d2;\n\t\t// int value[max_d];\n\t\t// int index[max_d];\n\t\t// int offset[max_d];\n\n\t\tuint32_t hash_value = (bobhash1->Run(key, 4));\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\tint of, val;\n\t\t\tof = (hash_value & 0xF);\n\t\t\tval = (temp >> (of << 2)) & 0xF;\n\t\t\tv1 = std::min(val, v1);\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tif (v1 != 15)\n\t\t\treturn v1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tint index = (bobhash2[i]->Run(key, 4)) % w2;\n\t\t\tint value = L2[index];\n\t\t\tv2 = std::min(value, v2);\n\t\t}\n\n\t\treturn v1 + v2;\n\t}\n};\n\n#endif//_SC_H\n/*----optional according to your need----*/\n\n#define T1 15\n#define T2 241\n#define THRESHOLD (T1 + T2)\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ColdFilter_CM_CU: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint total_memory_in_bytes;//parameter\n\tint filter_memory_percent;//parameter\n\tint bucket_num;//parameter\n\tint counter_num;//parameter\n\tStreamClassifier<T2, 65> *sc;\n\tCM_CU sketch;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ColdFilter_CM_CU()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ColdFilter_CM_CU\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"total_memory_in_bytes\")\n {\n\n\t\t\ttotal_memory_in_bytes = parameterValue;\n return;\n }\n if (parameterName==\"filter_memory_percent\")\n {\n\t\t\tfilter_memory_percent= parameterValue;\n return;\n }\n if (parameterName==\"bucket_num\")\n {\n\t\t\t bucket_num = parameterValue;\n return;\n }\n\t\t if (parameterName == \"counter_num\")\n\t\t {\n\t\t\t counter_num = parameterValue;\n\t\t\t return;\n\t\t }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tsc = new StreamClassifier<T2, 65>(int64_t(total_memory_in_bytes) * filter_memory_percent / 100, bucket_num, counter_num);\n\t\tsketch.parameterSet(\"memory_in_bytes\", int((total_memory_in_bytes) * (100 - filter_memory_percent) / 100));\n\t\tsketch.parameterSet(\"hash_num\",3);\n\t\tsketch.init();\n\t\tsc->init_spa(&sketch);\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tsc->insert(str);\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint ret = sc->query(str);\n\t\tif (ret == THRESHOLD)\n\t\t\tret += sketch.frequencyQuery(str,len);\n\n\t\treturn ret;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n return topkItem;\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n \n /*----optional according to your need----*/\n }\n ~ColdFilter_CM_CU()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\tdelete sc;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ColdFilter_CM_CU);\n#endif//DO NOT change this file"}, {"path": "skbm/new_sketch/sketch/ColdFilter_CM_CU_Heap.h", "name": "ColdFilter_CM_CU_Heap", "params": [{"field": "total_memory_in_bytes", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}, {"field": "filter_memory_percent", "help": "", "type": "int"}, {"field": "bucket_num", "help": "", "type": "int"}, {"field": "counter_num", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "code": "#ifndef ColdFilter_CM_CU_Heap_H //must change this MACRO\n#define ColdFilter_CM_CU_Heap_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\"//If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n#include \"../sketch/CM_CU_Heap.h\"\n#pragma pack (16)\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n\n\n\n\n#ifndef _SC_H\n#define _SC_H\n\n//#include \"params.h\"\n#include <cstring>\n#include <algorithm>\n#include <emmintrin.h>\n#include <immintrin.h>\n#include <stdexcept>\n#include <x86intrin.h>\nusing namespace std;\n\n#define MAX_HASH_NUM_SC 4\n\n\ntemplate<int threshold = 240, int l1_ratio = 65>\nclass StreamClassifier\n{\n\t// static constexpr int bucket_num = 1000;\n\t// static constexpr int counter_num = 16;\n\tint memory_in_bytes,bucket_num,counter_num;\n\t\n\tint buffer_size;\n\tint remained;\n\tint d1;\n\tint m1_in_bytes;\n\tint d2;\n\tint m2_in_bytes;\n\n\n\t//uint32_t **ID;\n\t//int **counter;\n\t//int *cur_pos;\n\tuint32_t ID[1005][16] __attribute__((aligned(16)));\n\tint counter[1005][16];\n\tint cur_pos[1005];\n\n\tint w1;\n\tint w_word;\n\tint w2;\n\tuint64_t * L1;\n\tuint16_t * L2;\n\t//uint64_t L1[m1_in_bytes * 8 / 4 / 16]; // Layer 1 is organized as word, one word contains 16 counter, one counter consist of 4 bit\n\t//uint16_t L2[m2_in_bytes * 8 / 16]; // Layer 2 is organized as counter, one counter consist of 16 bit\n\n\tSketchBase * spa;\n\n\tBOBHash * bobhash1;\n\tBOBHash ** bobhash2;\n\n\tint cur_kick;\n\n\tvoid clear_data()\n\t{\n\t\tcur_kick = 0;\n\t\tmemset(ID, 0, sizeof(ID));\n\t\tmemset(counter, 0, sizeof(counter));\n\t\tmemset(cur_pos, 0, sizeof(cur_pos));\n\t\tmemset(L1, 0, sizeof(L1));\n\t\tmemset(L2, 0, sizeof(L2));\n\t}\npublic:\n\tStreamClassifier(int memory_in_bytes__,int bucket_num__,int counter_num__):memory_in_bytes(memory_in_bytes__),bucket_num(bucket_num__),counter_num(counter_num__)\n\t{\n\t\tbuffer_size = bucket_num * counter_num * 8;\n\t\tremained = memory_in_bytes - buffer_size;\n\n\t\td1 = 3;\n\t\tm1_in_bytes = int(remained * l1_ratio / 100.0);\n\t\td2 = 3;\n\t\tm2_in_bytes = int(remained * (100 - l1_ratio) / 100.0);\n\t\t//ID = new uint32_t*[bucket_num];\n\t\t//counter = new int*[bucket_num];\n\t\t//cur_pos = new int[bucket_num];\n\t\t//for (int i = 0; i < bucket_num; ++i)\n\t\t//{\n\n\t\t//\tchar* buf = new char[counter_num*4 + 16];\n\t\t//\tID[i] = (uint32_t *)(((uint64_t)buf + 15) & ~15);\n\t\t//\t//ID[i] = new uint32_t[counter_num];\n\t\t//\tcounter[i] = new int[counter_num];\n\t\t//}\n\t\tw1 = m1_in_bytes * 8 / 4;\n\t\tw_word = m1_in_bytes * 8 / 4 / 16;\n\t\tw2 = m2_in_bytes * 8 / 16;\n\t\t\n\t\tL1 = new uint64_t[m1_in_bytes * 8 / 4 / 16];\n\t\tL2 = new uint16_t[m2_in_bytes * 8 / 16];\n\n\t\tbobhash2 = new BOBHash*[d2];\n\t\tbobhash1 = new BOBHash;\n\t\tbobhash1->SetSeed(500);\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tbobhash2[i] = new BOBHash;\n\t\t\tbobhash2[i]->SetSeed(1000 + i);\n\t\t}\n\t\tclear_data();\n\t\tspa = NULL;\n\t}\n\n\tvoid print_basic_info()\n\t{\n\t\tprintf(\"Stream Classifer\\n\");\n\t\tprintf(\"\\tSIMD buffer: %d counters, %.4lf MB occupies\\n\", bucket_num * counter_num, bucket_num * counter_num * 8.0 / 1024 / 1024);\n\t\tprintf(\"\\tL1: %d counters, %.4lf MB occupies\\n\", w1, w1 * 0.5 / 1024 / 1024);\n\t\tprintf(\"\\tL2: %d counters, %.4lf MB occupies\\n\", w2, w2 * 2.0 / 1024 / 1024);\n\t}\n\n\t~StreamClassifier()\n\t{\n\t\tdelete bobhash1;\n\t\tfor (int i = 0; i < d2; i++)\n\t\t\tdelete bobhash2[i];\n\t}\n\n\t//periodical refreshing for continuous top-k;\n\tvoid init_array_period()\n\t{\n\t\tfor (int i = 0; i < w_word; i++) {\n\t\t\tuint64_t temp = L1[i];\n\n\t\t\ttemp = (temp & (0xF)) == 0xF ? temp : (temp & 0xFFFFFFFFFFFFFFF0);\n\t\t\ttemp = (temp & (0xF0)) == 0xF0 ? temp : (temp & 0xFFFFFFFFFFFFFF0F);\n\t\t\ttemp = (temp & (0xF00)) == 0xF00 ? temp : (temp & 0xFFFFFFFFFFFFF0FF);\n\t\t\ttemp = (temp & (0xF000)) == 0xF000 ? temp : (temp & 0xFFFFFFFFFFFF0FFF);\n\n\t\t\ttemp = (temp & (0xF0000)) == 0xF0000 ? temp : (temp & 0xFFFFFFFFFFF0FFFF);\n\t\t\ttemp = (temp & (0xF00000)) == 0xF00000 ? temp : (temp & 0xFFFFFFFFFF0FFFFF);\n\t\t\ttemp = (temp & (0xF000000)) == 0xF000000 ? temp : (temp & 0xFFFFFFFFF0FFFFFF);\n\t\t\ttemp = (temp & (0xF0000000)) == 0xF0000000 ? temp : (temp & 0xFFFFFFFF0FFFFFFF);\n\n\t\t\ttemp = (temp & (0xF00000000)) == 0xF00000000 ? temp : (temp & 0xFFFFFFF0FFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000)) == 0xF000000000 ? temp : (temp & 0xFFFFFF0FFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000)) == 0xF0000000000 ? temp : (temp & 0xFFFFF0FFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000)) == 0xF00000000000 ? temp : (temp & 0xFFFF0FFFFFFFFFFF);\n\n\t\t\ttemp = (temp & (0xF000000000000)) == 0xF000000000000 ? temp : (temp & 0xFFF0FFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF0000000000000)) == 0xF0000000000000 ? temp : (temp & 0xFF0FFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF00000000000000)) == 0xF00000000000000 ? temp : (temp & 0xF0FFFFFFFFFFFFFF);\n\t\t\ttemp = (temp & (0xF000000000000000)) == 0xF000000000000000 ? temp : (temp & 0x0FFFFFFFFFFFFFFF);\n\n\t\t\tL1[i] = temp;\n\t\t}\n\n\t\tfor (int i = 0; i < w2; i++) {\n\t\t\tshort int temp = L2[i];\n\t\t\tL2[i] = (temp == threshold) ? temp : 0;\n\t\t}\n\t}\n\n\tvoid init_array_all()\n\t{\n\t\tmemset(L1, 0, sizeof(uint64_t) * w_word);\n\t\tmemset(L2, 0, sizeof(short int) * w2);\n\t}\n\n\tvoid init_spa(SketchBase * _spa)\n\t{\n\t\tspa = _spa;\n\t}\n\n\tvoid insert(const char * str)\n\t{\n\t\tuint32_t key;\n\t\tmemcpy(&key, str, 4);\n\t\tint bucket_id = key % bucket_num;\n\t\t// int bucket_id = key & 0x2FF;\n\n\t\t// the code below assume counter per buckets is 16\n\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\n\t\tconst __m128i item = _mm_set1_epi32((int)key);\n\t\tint matched;\n\n\t\tif (counter_num == 16) {\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n\t\t\t__m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]);\n\t\t\t__m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]);\n\t\t\t__m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\n\t\t\ta_comp = _mm_packs_epi32(a_comp, b_comp);\n\t\t\tc_comp = _mm_packs_epi32(c_comp, d_comp);\n\t\t\ta_comp = _mm_packs_epi32(a_comp, c_comp);\n\n\t\t\tmatched = _mm_movemask_epi8(a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse if (counter_num == 4) {\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n\t\t\tcout << bucket_id << endl;\n#endif // DEBUG\n\t\t\t__m128i *keys_p = (__m128i *)ID[bucket_id];\n#ifdef DEBUG\n#endif // DEBUG\n\t\t\t__m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t\tmatched = _mm_movemask_ps(*(__m128 *)&a_comp);\n#ifdef DEBUG\n\t\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\t}\n\t\telse {\n\t\t\tthrow std::logic_error(\"Not implemented.\");\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tif (matched != 0) {\n\t\t\t//return 32 if input is zero;\n\t\t\tint matched_index = _tzcnt_u32((uint32_t)matched);\n\n\t\t\t++counter[bucket_id][matched_index];\n\t\t\treturn;\n\t\t}\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint cur_pos_now = cur_pos[bucket_id];\n\t\tif (cur_pos_now != counter_num) {\n\t\t\t// printf(\"%d\\n\", cur_pos_now);\n\t\t\tID[bucket_id][cur_pos_now] = key;\n\t\t\t++counter[bucket_id][cur_pos_now];\n\t\t\t++cur_pos[bucket_id];\n\t\t\treturn;\n\t\t}\n\n\t\t/****************randomly choose one counter to kick!******************/\n\t\tchar tmp2[4];\n\t\tmemcpy(tmp2, &ID[bucket_id][cur_kick], 4);\n\t\tinsert_SC(tmp2, counter[bucket_id][cur_kick]);\n\t\tID[bucket_id][cur_kick] = key;\n\t\tcounter[bucket_id][cur_kick] = 1;\n\n\t\tcur_kick = (cur_kick + 1) % counter_num;\n\t}\n\n\tvoid insert_SC(const char * kick_ID, int kick_f)\n\t{\n#ifdef DEBUG\n\t\tcout << __LINE__ << endl;\n#endif // DEBUG\n\t\tint v1 = 1 << 30;\n\n\t\tint value[MAX_HASH_NUM_SC];\n\t\tint index[MAX_HASH_NUM_SC];\n\t\tint offset[MAX_HASH_NUM_SC];\n\n\t\tuint64_t hash_value = bobhash1->Run(kick_ID, 4);\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\toffset[i] = (hash_value & 0xF);\n\t\t\tvalue[i] = (temp >> (offset[i] << 2)) & 0xF;\n\t\t\tv1 = std::min(v1, value[i]);\n\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tint temp2 = v1 + kick_f;\n\t\tif (temp2 <= 15) { // maybe optimized use SIMD?\n\t\t\tfor (int i = 0; i < d1; i++) {\n\t\t\t\tint temp3 = ((temp >> (offset[i] << 2)) & 0xF);\n\t\t\t\tif (temp3 < temp2) {\n\t\t\t\t\ttemp += ((uint64_t)(temp2 - temp3) << (offset[i] << 2));\n\t\t\t\t}\n\t\t\t}\n\t\t\tL1[word_index] = temp;\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\ttemp |= ((uint64_t)0xF << (offset[i] << 2));\n\t\t}\n\t\tL1[word_index] = temp;\n\n\t\tint delta1 = 15 - v1;\n\t\tkick_f -= delta1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tindex[i] = (bobhash2[i]->Run(kick_ID, 4)) % w2;\n\t\t\tvalue[i] = L2[index[i]];\n\t\t\tv2 = std::min(value[i], v2);\n\t\t}\n\n\t\ttemp2 = v2 + kick_f;\n\t\tif (temp2 <= threshold) {\n\t\t\tfor (int i = 0; i < d2; i++) {\n\t\t\t\tL2[index[i]] = (L2[index[i]] > temp2) ? L2[index[i]] : temp2;\n\t\t\t}\n\t\t\treturn;\n\t\t}\n\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tL2[index[i]] = threshold;\n\t\t}\n\n\t\tint delta2 = threshold - v2;\n\t\tkick_f -= delta2;\n\t\tfor (int i = 0; i < kick_f; ++i)\n\t\t{\n\t\t\tspa->Insert(kick_ID, 4);\n\t\t}\n\t}\n\n\tvoid refresh()\n\t{\n\t\tfor (int i = 0; i < bucket_num; i++) {\n\t\t\tfor (int j = 0; j < counter_num; j++) {\n\t\t\t\tinsert_SC(ID[i][j], counter[i][j]);\n\t\t\t\tID[i][j] = counter[i][j] = 0;\n\t\t\t}\n\t\t\tcur_pos[i] = 0;\n\t\t}\n\t\treturn;\n\t}\n\n\tint query(const char * key)\n\t{\n\t\tint v1 = 1 << 30;\n\n\t\t// constexpr int max_d = d1 > d2 ? d1 : d2;\n\t\t// int value[max_d];\n\t\t// int index[max_d];\n\t\t// int offset[max_d];\n\n\t\tuint32_t hash_value = (bobhash1->Run(key, 4));\n\t\tint word_index = hash_value % w_word;\n\t\thash_value >>= 16;\n\n\t\tuint64_t temp = L1[word_index];\n\t\tfor (int i = 0; i < d1; i++) {\n\t\t\tint of, val;\n\t\t\tof = (hash_value & 0xF);\n\t\t\tval = (temp >> (of << 2)) & 0xF;\n\t\t\tv1 = std::min(val, v1);\n\t\t\thash_value >>= 4;\n\t\t}\n\n\t\tif (v1 != 15)\n\t\t\treturn v1;\n\n\t\tint v2 = 1 << 30;\n\t\tfor (int i = 0; i < d2; i++) {\n\t\t\tint index = (bobhash2[i]->Run(key, 4)) % w2;\n\t\t\tint value = L2[index];\n\t\t\tv2 = std::min(value, v2);\n\t\t}\n\n\t\treturn v1 + v2;\n\t}\n};\n\n#endif//_SC_H\n/*----optional according to your need----*/\n\n#define T1 15\n#define T2 241\n#define THRESHOLD (T1 + T2)\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\n define: e.g. BOBHash myhash\n setseed: e.g. myhash.SetSeed(1001)\n calculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE:\ndefine: cuckoo::CuckooHashing<key_len> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase(){}\nvirtual void Insert(const char * str, const int & len) = 0;\nvirtual int frequencyQuery(const char * str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > topkQuery(const int & k) = 0;\nvirtual void parameterSet(const std::string& parameterName, double parameterValue)=0;\nvirtual void init() = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\n\nclass ColdFilter_CM_CU_Heap: public SketchBase {\nprivate:\n\t/*----optional according to your need----*/\n\tint total_memory_in_bytes;//parameter\n\tint filter_memory_percent;//parameter\n\tint bucket_num;//parameter\n\tint counter_num;//parameter\n\tint capacity;\n\tStreamClassifier<T2, 65> *sc;\n\tCM_CU_Heap sketch;\n /*----optional according to your need----*/\npublic:\n using SketchBase::sketch_name;//DO NOT change this declaration\n ColdFilter_CM_CU_Heap()\n {\n /*constructed function MUST BT non-parameter!!!*/\n sketch_name = \"ColdFilter_CM_CU_Heap\";//please keep sketch_name the same as class name and .h file name\n }\n void parameterSet(const std::string& parameterName, double parameterValue)\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n if (parameterName==\"total_memory_in_bytes\")\n {\n\n\t\t\ttotal_memory_in_bytes = parameterValue;\n return;\n }\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n if (parameterName==\"filter_memory_percent\")\n {\n\t\t\tfilter_memory_percent= parameterValue;\n return;\n }\n if (parameterName==\"bucket_num\")\n {\n\t\t\t bucket_num = parameterValue;\n return;\n }\n\t\t if (parameterName == \"counter_num\")\n\t\t {\n\t\t\t counter_num = parameterValue;\n\t\t\t return;\n\t\t }\n /*----optional according to your need----*/\n }\n void init()\n {\n /*MUST have this function even empty function body*/\n\n /*----optional according to your need----*/\n\t\tsc = new StreamClassifier<T2, 65>(int64_t(total_memory_in_bytes) * filter_memory_percent / 100, bucket_num, counter_num);\n\t\tsketch.parameterSet(\"memory_in_bytes\", int((total_memory_in_bytes) * (100 - filter_memory_percent) / 100));\n\t\tsketch.parameterSet(\"hash_num\",3);\n\t\tsketch.parameterSet(\"capacity\", capacity);\n\t\tsketch.init();\n\t\tsc->init_spa(&sketch);\n /*----optional according to your need----*/\n }\n void Insert(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change parameter type*/\n\n /*----optional according to your need----*/\n\t\tsc->insert(str);\n /*----optional according to your need----*/\n }\n int frequencyQuery(const char *str, const int & len)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n\t\tint ret = sc->query(str);\n\t\tif (ret == THRESHOLD)\n\t\t\tret += sketch.frequencyQuery(str,len);\n\n\t\treturn ret;\n /*----optional according to your need----*/\n }\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n {\n /*MUST have this function DO NOT change function head and parameter type */\n\n /*----optional according to your need----*/\n return sketch.topkQuery(k);\n /*----optional according to your need----*/\n }\n void reset()\n {\n /*MUST have this function,reset sketch to the initial state */\n\n /*----optional according to your need----*/\n \n /*----optional according to your need----*/\n }\n ~ColdFilter_CM_CU_Heap()\n {\n /*MUST have this function */\n\n /*----optional according to your need----*/\n\t\tdelete sc;\n /*----optional according to your need----*/\n }\n\n /*----optional You can add your function----*/\n};\nREGISTER(ColdFilter_CM_CU_Heap);\n#endif//DO NOT change this file"}, {"code": "#ifndef CM_CU_HEAP_H //must change this MACRO\n#define CM_CU_HEAP_H //must change this MACRO\n#include \"SketchBase.h\" //DO NOT change this include\n#include \"factor.h\"//DO NOT change this include\n#include \"../hash/hashfunction.h\" //If you want to use DSAB-builtin hashfunction must include this\n#include \"../hash/cuckoo_hashing.h\" //If you want to use DSAB-builtin hashtable must include this\n\n/*----optional according to your need----*/\n#include<string>\n#include<iostream>\n#include<memory.h>\n#include <unordered_map>\n#include <algorithm>\n#include <sstream>\nusing std::min;\nusing std::swap;\n/*----optional according to your need----*/\n\n/*----builtin hashfunction----*/\n/*\nDSAB-builtin hashfunction type:BOBHash\\\nHOW TO USE:\ndefine: e.g. BOBHash myhash\nsetseed: e.g. myhash.SetSeed(1001)\ncalculate hash: e.g. myhash.Run(const char *str, const int & len)\n*/\n/*----builtin hashfunction----*/\n\n/*----builtin hashTable----*/\n/*\nDSAB-builtin hashTable type:cuckoo_hashtable\\\nHOW TO USE: \ndefine: cuckoo::CuckooHashing<key_len, capacity> ht;\n!!!MUST init: ht.init(capacity)\nbool insert(char * key, uint32_t val, int from_k = -1, int remained = 5)\nbool query(char * key, uint32_t & val)\nbool find(char * key)\nbool erase(char * key)\n*/\n/*----builtin hashTable----*/\n\n\n/*----SketchBase virtual function must be finished----*/\n/*\nvirtual ~SketchBase() = 0;\nvirtual void parameterSet(const string& parameterName, double parameterValue)=0;\nvirtual init() = 0;\nvirtual void Insert(const char *str, const int & len) = 0;\nvirtual std::vector<std::pair <std::string, int> > frequencyQuery(const char *str, const int & len) = 0;\nvirtual vector<string> topkQuery(const int & k) = 0;\nvirtual void reset() = 0;//reset sketch to the initial state\n*/\n/*----SketchBase virtual function must be finished----*/\n\nbool CMCUHcmp(const pair<string, uint32_t> a, const pair<string, uint32_t> b) {\n\treturn a.second > b.second;\n}\nclass CM_CU_Heap : public SketchBase {\nprivate:\n\t\n\t/*----optional according to your need----*/\n\tint hash_num;//parameter\n\tint memory_in_bytes;//parameter\n\tint capacity;//parameter\n\n\ttypedef pair <string, int> KV;\n\ttypedef pair <int, string> VK;\n\tVK * heap;\n\tint heap_element_num;\n\tint w;\n\tint ** cm_cu;\n\tBOBHash * hash;\n\t//cuckoo::CuckooHashing<4> ht;\n\n\tunordered_map<string, uint32_t> ht;\n\t/*----optional according to your need----*/\n\t// heap\n\tvoid heap_adjust_down(int i) {\n\t\twhile (i < heap_element_num / 2) {\n\t\t\tint l_child = 2 * i + 1;\n\t\t\tint r_child = 2 * i + 2;\n\t\t\tint larger_one = i;\n\t\t\tif (l_child < heap_element_num && heap[l_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = l_child;\n\t\t\t}\n\t\t\tif (r_child < heap_element_num && heap[r_child] < heap[larger_one]) {\n\t\t\t\tlarger_one = r_child;\n\t\t\t}\n\t\t\tif (larger_one != i) {\n\t\t\t\tswap(heap[i], heap[larger_one]);\n\t\t\t\tswap(ht[heap[i].second], ht[heap[larger_one].second]);\n\t\t\t\theap_adjust_down(larger_one);\n\t\t\t}\n\t\t\telse {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n\tvoid heap_adjust_up(int i) {\n\t\twhile (i > 1) {\n\t\t\tint parent = (i - 1) / 2;\n\t\t\tif (heap[parent] <= heap[i]) {\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\tswap(heap[i], heap[parent]);\n\t\t\tswap(ht[heap[i].second], ht[heap[parent].second]);\n\t\t\ti = parent;\n\t\t}\n\t}\n\npublic:\n\tusing SketchBase::sketch_name;//DO NOT change this declaration\n\tCM_CU_Heap()\n\t{\n\t\t/*constructed function MUST BT non-parameter!!!*/\n\t\tsketch_name = \"CM_CU_Heap\";//please keep sketch_name the same as class name and .h file name\n\t}\n\tvoid parameterSet(const std::string& parameterName, double parameterValue)\n\t{\n\t\t/*MUST have this function even empty function body*/\n\n\t\t/*----optional according to your need----*/\n\t\tif (parameterName == \"hash_num\")\n\t\t{\n\n\t\t\thash_num = parameterValue;\n\t\t\treturn;\n\t\t}\n\t\tif (parameterName == \"capacity\")\n\t\t{\n\t\t\tcapacity = parameterValue;\n\t\t\treturn;\n\t\t}\n\t\tif (parameterName == \"memory_in_bytes\")\n\t\t{\n\t\t\tmemory_in_bytes = parameterValue;\n\t\t\treturn;\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid init()\n\t{\n\t\t/*MUST have this function even empty function body*/\n\n\t\t/*----optional according to your need----*/\n\t\n\t\tint sketchMem = memory_in_bytes - capacity * 8;\n\t\tw = sketchMem / 4 / hash_num;\n\t\theap = new VK[capacity];\n\t\tmemset(heap, 0, capacity * sizeof(VK));\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\theap[i].first = 0;\n\t\t}\n\t\theap_element_num = 0;\n\t\tcm_cu = new int*[hash_num];\n\t\thash = new BOBHash[hash_num];\n\t\tfor (int i = 0; i<hash_num; ++i)\n\t\t{\n\t\t\tcm_cu[i] = new int[w];\n\t\t\tmemset(cm_cu[i], 0, sizeof(int)*w);\n\t\t\trandom_device rd;\n\t\t\thash[i].SetSeed(uint32_t(rd() % MAX_PRIME32));\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid Insert(const char *str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change parameter type*/\n\n\t\t/*----optional according to your need----*/\n\n\t\tint tmin = 1 << 30;\n\t\tint idxs[100];\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tidxs[i] = hash[i].Run(str, len) % w;\n\t\t\ttmin = min(cm_cu[i][idxs[i]], tmin);\n\t\t}\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tif (cm_cu[i][idxs[i]] == tmin)\n\t\t\t\t++cm_cu[i][idxs[i]];\n\t\t}\n\t\ttmin += 1;\n\t\tstring str_key = string(str, len);\n\t\tif (ht.find(str_key) != ht.end()) {\n\t\t\theap[ht[str_key]].first++;\n\t\t\theap_adjust_down(ht[str_key]);\n\t\t}\n\t\telse if (heap_element_num < capacity) {\n\t\t\theap[heap_element_num].second = str_key;\n\t\t\theap[heap_element_num].first = tmin;\n\t\t\tht[str_key] = heap_element_num++;\n\t\t\theap_adjust_up(heap_element_num - 1);\n\t\t}\n\t\telse if (tmin > heap[0].first) {\n\t\t\tVK & kv = heap[0];\n\t\t\tht.erase(kv.second);\n\t\t\tkv.second = str_key;\n\t\t\tkv.first = tmin;\n\t\t\tht[str_key] = 0;\n\t\t\theap_adjust_down(0);\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\tint frequencyQuery(const char * str, const int & len)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tint tmin = 1 << 30, ans = tmin;\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tint idx = hash[i].Run(str, len) % w;\n\t\t\tint val = cm_cu[i][idx];\n\t\t\tans = std::min(val, ans);\n\t\t}\n\t\treturn ans;\n\t\t/*----optional according to your need----*/\n\t}\n\tstd::vector<std::pair <std::string, int> > topkQuery(const int & k)\n\t{\n\t\t/*MUST have this function DO NOT change function head and parameter type */\n\n\t\t/*----optional according to your need----*/\n\t\tstd::vector<std::pair <std::string, int> > topkItem;\n\n\t\tVK * a = new VK[capacity];\n\t\tfor (int i = 0; i < capacity; ++i) {\n\t\t\ta[i] = heap[i];\n\t\t}\n\t\tsort(a, a + capacity);\n\t\tint i;\n\t\tfor (i = 0; i < k && i < capacity; ++i) {\n\t\t\ttopkItem.push_back(make_pair(a[capacity - 1 - i].second, a[capacity - 1 - i].first));\n\n\t\t}\n\t\treturn topkItem;\n\t\t/*----optional according to your need----*/\n\t}\n\tvoid reset()\n\t{\n\t\t/*MUST have this function,reset sketch to the initial state */\n\n\t\t/*----optional according to your need----*/\n\t\theap_element_num = 0;\n\t\tfor (int i; i<hash_num; ++i)\n\t\t{\n\t\t\tmemset(cm_cu[i], 0, sizeof(int) * w);\n\t\t}\n\t\t/*----optional according to your need----*/\n\t}\n\t~CM_CU_Heap()\n\t{\n\t\t/*MUST have this function */\n\n\t\t/*----optional according to your need----*/\n\t\tfor (int i = 0; i < hash_num; ++i) {\n\t\t\tdelete [] cm_cu[i];\n\t\t}\n\t\tdelete [] hash;\n\t\tdelete [] cm_cu;\n\t\treturn;\n\t\t/*----optional according to your need----*/\n\t}\n\n\t/*----optional You can add your function----*/\n};\nREGISTER(CM_CU_Heap);\n#endif//DO NOT change this file", "name": "CM_CU_Heap", "params": [{"field": "hash_num", "help": "", "type": "int"}, {"field": "capacity", "help": "", "type": "int"}, {"field": "memory_in_bytes", "help": "", "type": "int"}], "tasks": [{"name": "freq", "params": []}, {"name": "topk", "params": [{"field": "k", "help": "xxx", "type": "int"}]}, {"name": "speed", "params": []}], "path": "skbm/new_sketch/sketch/CM_CU_Heap.h"}]