AI-Hypercomputer
diff --git a/‎.github/workflows/UnitTests.yaml‎
Lines changed: 9 additions & 1 deletion b/‎.github/workflows/UnitTests.yaml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎benchmarks/benchmark_serving.py‎
Lines changed: 22 additions & 29 deletions b/‎benchmarks/benchmark_serving.py‎
Lines changed: 22 additions & 29 deletions
diff --git a/‎benchmarks/eval_accuracy.py‎
Lines changed: 45 additions & 36 deletions b/‎benchmarks/eval_accuracy.py‎
Lines changed: 45 additions & 36 deletions
diff --git a/‎jetstream/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎jetstream/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎jetstream/core/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎jetstream/core/__init__.py‎
Lines changed: 0 additions & 1 deletion
@@ -28,7 +28,7 @@ on:
 
 jobs:
   py:
-    name: "Python type check"
+    name: "Python type/lint/format checks"
     strategy:
       matrix:
         os: [ubuntu-20.04]
@@ -43,10 +43,18 @@ jobs:
     - name: Install Dependencies
       run: |
         pip install pytype
+        pip install pylint
+        pip install pyink
         pip install -r requirements.txt
     - name: Typecheck the code with pytype
       run: |
         pytype --jobs auto --disable import-error --disable module-attr jetstream/
+    - name: Analysing the code with pylint
+      run: |
+        pylint jetstream/ benchmarks/
+    - name: Format check with pyink
+      run: |
+        pyink --pyink-indentation 2 --line-length 80 --check --verbose .
 
   cpu:
     name: "JetStream unit tests"
 
@@ -15,9 +15,10 @@
 """Benchmark JetStream online serving.
 
 On the server side, run one of the following commands:
-    * For real server, you need to pass correct server config (include the model config that
-      being passed into your engine impl) to the command below. Refer to config_lib.py and
-      implementations/mock/config.py for config impl detail.
+    * For real server, you need to pass correct server config (include the
+      model config that being passed into your engine impl) to the command
+      below. Refer to config_lib.py and implementations/mock/config.py for
+      config impl detail.
 
     (run with real server)
     python -m jetstream.core.implementations.<your_impl>.server \
@@ -27,11 +28,12 @@
     python -m jetstream.core.implementations.mock.server
 
 On the client side, run:
-    * For real server and shareGPT dataset, you need to pass the tokenizer, server config, and
-      dataset flags to the command below, and make some changes to the tokenizer logic in the
-      benchmark script (get_tokenizer and sample_requests func) to use your tokenizer correctly.
-    * Add `--save-result` flag to save the benchmark result to a json file in current folder.
-    * Add `--threads` flag to set the maximum number of threads used for request dispatching.
+    * For real server and shareGPT dataset, you need to pass the tokenizer,
+      server config, and dataset flags to the command below, and make some
+      changes to the tokenizer logic in the benchmark script (get_tokenizer
+      and sample_requests func) to use your tokenizer correctly.
+    * Add `--save-result` flag to save the benchmark result to a json file in
+      current folder.
 
     (run with real model and engines)
     python -m benchmarks.benchmark_serving \
@@ -74,6 +76,8 @@
 
 @dataclass
 class BenchmarkMetrics:
+  """Data class to store benchmark metrics."""
+
   completed: int
   total_input: int
   total_output: int
@@ -136,7 +140,7 @@ def load_sharegpt_dataset(
     conversation_starter: str,
 ) -> List[tuple[str]]:
   # Load the dataset.
-  with open(dataset_path) as f:
+  with open(dataset_path, "r", encoding="utf-8") as f:
     dataset = json.load(f)
   # Filter out the conversations with less than 2 turns.
   dataset = [data for data in dataset if len(data["conversations"]) >= 2]
@@ -159,7 +163,7 @@ def load_sharegpt_dataset(
 
 def load_openorca_dataset(dataset_path: str) -> List[tuple[str]]:
   # Load the dataset.
-  with open(dataset_path) as f:
+  with open(dataset_path, "r", encoding="utf-8") as f:
     dataset = json.load(f)
 
   # Tokenize the prompts and completions.
@@ -211,7 +215,7 @@ def filter_dataset(
   filtered_dataset: List[InputRequest] = []
   for (
       prompt,
-      prompt_token_ids,
+      _,
       output,
       prompt_len,
       output_len,
@@ -255,7 +259,7 @@ def sample_requests(
     print(
         f"Number of requests {num_requests} is larger than size of dataset"
         f" {n}.\n",
-        f"Repeating data to meet number of requests.\n",
+        "Repeating data to meet number of requests.\n",
     )
     sampled_indices = sampled_indices * int(
         np.ceil(num_requests / len(sampled_indices))
@@ -361,7 +365,6 @@ async def send_request(
     pbar: tqdm,
     session_cache: str,
     priority: int,
-    threads: int,
 ) -> RequestFuncOutput:
   """Send the request to JetStream server."""
   request = jetstream_pb2.DecodeRequest(
@@ -394,7 +397,6 @@ async def benchmark(
     disable_tqdm: bool,
     session_cache: str,
     priority: int,
-    threads: int,
 ):
   """Benchmark the online serving performance."""
   pbar = None if disable_tqdm else tqdm(total=len(input_requests))
@@ -412,7 +414,6 @@ async def benchmark(
                 pbar=pbar,
                 session_cache=session_cache,
                 priority=priority,
-                threads=threads,
             )
         )
     )
@@ -519,8 +520,8 @@ def main(args: argparse.Namespace):
       )
 
     # A given args.max_output_length value is the max generation step,
-    # when the args.max_output_length is default to None, the sample's golden output length
-    # will be used to decide the generation step
+    # when the args.max_output_length is default to None, the sample's golden
+    # output length will be used to decide the generation step.
     input_requests = sample_requests(
         dataset=dataset,
         tokenizer=tokenizer,
@@ -540,7 +541,6 @@ def main(args: argparse.Namespace):
             disable_tqdm=args.disable_tqdm,
             session_cache=args.session_cache,
             priority=args.priority,
-            threads=args.threads,
         )
     )
     print("Warm up done")
@@ -554,7 +554,6 @@ def main(args: argparse.Namespace):
           disable_tqdm=args.disable_tqdm,
           session_cache=args.session_cache,
           priority=args.priority,
-          threads=args.threads,
       )
   )
 
@@ -582,12 +581,12 @@ def main(args: argparse.Namespace):
     file_name = (
         f"JetStream-{args.request_rate}qps-{base_model_id}-{current_dt}.json"
     )
-    with open(file_name, "w") as outfile:
+    with open(file_name, "w", encoding="utf-8") as outfile:
       json.dump(result_json, outfile)
 
   if args.save_request_outputs:
     file_path = args.request_outputs_file_path
-    with open(file_path, "w") as output_file:
+    with open(file_path, "w", encoding="utf-8") as output_file:
       json.dump(
           [output.to_dict() for output in request_outputs],
           output_file,
@@ -653,12 +652,6 @@ def main(args: argparse.Namespace):
           "the request arrival times."
       ),
   )
-  parser.add_argument(
-      "--threads",
-      type=int,
-      default=110,
-      help="The maximum number of threads used for request dispatching.",
-  )
   parser.add_argument(
       "--total-mock-requests",
       type=int,
@@ -736,5 +729,5 @@ def main(args: argparse.Namespace):
       help="What entity should be the one starting the conversations.",
   )
 
-  args = parser.parse_args()
-  main(args)
+  parsed_args = parser.parse_args()
+  main(parsed_args)
@@ -12,58 +12,67 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Evaluate accuracy of JetStream online serving."""
+
 import argparse
 import nltk
 import evaluate
 import json
 
 import numpy as np
 
+
 def postprocess_text(preds, targets):
-    preds = [pred.strip() for pred in preds]
-    targets = [target.strip() for target in targets]
+  preds = [pred.strip() for pred in preds]
+  targets = [target.strip() for target in targets]
 
-    # rougeLSum expects newline after each sentence
-    preds = ["\n".join(nltk.sent_tokenize(pred)) for pred in preds]
-    targets = ["\n".join(nltk.sent_tokenize(target)) for target in targets]
+  # rougeLSum expects newline after each sentence
+  preds = ["\n".join(nltk.sent_tokenize(pred)) for pred in preds]
+  targets = ["\n".join(nltk.sent_tokenize(target)) for target in targets]
 
-    return preds, targets
+  return preds, targets
 
 
 def eval_accuracy(request_outputs_dict):
-    metric = evaluate.load("rouge")
-    nltk.download('punkt')
-    preds = []
-    targets = []
-    
-    for output in request_outputs_dict:
-        preds.append(output["generated_text"])
-        targets.append(output["original_output"])
-    preds, targets = postprocess_text(preds, targets)
-    result = metric.compute(
-        predictions=preds, references=targets, use_stemmer=True, use_aggregator=False)
-    result = {k: round(np.mean(v) * 100, 4) for k, v in result.items()}
-    prediction_lens = [len(pred) for pred in preds]
-    result["gen_len"] = np.sum(prediction_lens)
-    result["gen_num"] = len(preds)
-    print("\nResults\n")
-    print(result)
+  metric = evaluate.load("rouge")
+  nltk.download("punkt")
+  preds = []
+  targets = []
+
+  for output in request_outputs_dict:
+    preds.append(output["generated_text"])
+    targets.append(output["original_output"])
+  preds, targets = postprocess_text(preds, targets)
+  result = metric.compute(
+      predictions=preds,
+      references=targets,
+      use_stemmer=True,
+      use_aggregator=False,
+  )
+  result = {k: round(np.mean(v) * 100, 4) for k, v in result.items()}
+  prediction_lens = [len(pred) for pred in preds]
+  result["gen_len"] = np.sum(prediction_lens)
+  result["gen_num"] = len(preds)
+  print("\nResults\n")
+  print(result)
 
 
 def main(args):
-    with open(args.output_path) as f:
-        request_outputs_dict = json.load(f)
-    
-    eval_accuracy(request_outputs_dict)        
+  with open(args.output_path, "r", encoding="utf-8") as f:
+    request_outputs_dict = json.load(f)
 
+  eval_accuracy(request_outputs_dict)
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-      "--output_path", type=str,
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      "--output_path",
+      type=str,
       default="/tmp/request-outputs.json",
-      help="File path which has original_output and inference generated_text.")
-    
-    args = parser.parse_args()
-    
-    main(args)
+      help="File path which has original_output and inference generated_text.",
+  )
+
+  parsed_args = parser.parse_args()
+
+  main(parsed_args)
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-