Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .dockerignore

This file was deleted.

5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
__pycache__/
myvenv/
myenv/
venv/
*.exe
.env
*.pyc
27 changes: 0 additions & 27 deletions Dockerfile

This file was deleted.

Binary file added ExamplePE.exe
Binary file not shown.
29 changes: 14 additions & 15 deletions resources/FeatureExtraction1a.py β†’ FeatureExtraction1a.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
File: FeatureExtraction.py
Author: Drew Wheeler
Last Edit: 2023-07-29_14:00-UTC

Modified by: Ubayeid U.
Modified: 2025-09-30

This file was written for Lab 1a of the AI-Assisted Malware Analysis Project,
funded by the NSF (grant #2025682).
Expand Down Expand Up @@ -71,28 +72,26 @@ def extract_sha256 (self, file_name: str = "") -> None:

def extract_header_s (self, binary: lief.PE.Binary = None) -> None:
# Extract the real size of the image
self.header_size = 0
self.header_size = binary.optional_header.sizeof_image

def extract_virtual_s (self, binary: lief.PE.Binary = None) -> None:
# Extract the virutal size of the image
self.virtual_size = 0
# Extract the virtual size of the image
self.virtual_size = binary.optional_header.sizeof_image

def extract_machine (self, binary: lief.PE.Binary = None) -> None:
# Extract the __name__ of the target machine found in the COFF header
self.target_machine = ""
self.target_machine = str(binary.header.machine).split('.')[-1]

def extract_sec_count (self, binary: lief.PE.Binary = None) -> None:
# Extract the number of sections within the executable the COFF header lists
self.section_count = 0
self.section_count = len(binary.sections)

def extract_sec_data (self, binary: lief.PE.Binary = None) -> None:
# Extract section names and their entropy
for section in binary.sections:
name: str = ""
entropy: float = 0.0
# name = section name (NOT the fullname)
# entropy = section entropy
self.section_info[name] = float (format (entropy, ".4f"))
name: str = section.name # ← ACTUAL implementation
entropy: float = section.entropy # ← ACTUAL implementation
self.section_info[name] = float(format(entropy, ".4f"))

def print_data (self) -> None:
print (self.sha256_hash)
Expand Down
124 changes: 114 additions & 10 deletions resources/FeatureExtraction1b.py β†’ FeatureExtraction1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
File: FeatureExtraction1b.py
Author: Drew Wheeler
Last Edit: 2024-08-23

Modified by: Ubayeid U.
Modified: 2025-09-30

This file was written for Lab 1a of the AI-Assisted Malware Analysis Project,
funded by the NSF (grant #2025682).
Expand Down Expand Up @@ -46,6 +47,9 @@ def __init__ (self) -> None:
self.section_count:int = 0
self.section_info:dict = {}
self.encoded_sec_info: dict = {}
self.encoded_target_machine: int = 0
self.decoded_target_machine: str = ""
self.decoded_sec_info: dict = {}

def extract_features (self, file_name: str = "") -> None:
binary: lief.PE.Binary = lief.parse (file_name)
Expand All @@ -55,24 +59,49 @@ def extract_features (self, file_name: str = "") -> None:
self.extract_machine (binary)
self.extract_sec_count (binary)
self.extract_sec_data (binary)
# Encode the extracted features
self.encode_target_machine()
self.encode_section_info()
# Decode to verify encoding worked correctly
self.decode_target_machine()
self.decode_section_info()

def extract_sha256 (self, file_name: str = "") -> None:
with open (file_name, "rb") as hasher:
byte_stream = hasher.read()
self.sha256_hash = sha256 (byte_stream).hexdigest()

# === Paste Lab 1a Methods Below This Line =========================================================
def extract_header_s (self, binary: lief.PE.Binary = None) -> None:
# Extract the real size of the image
self.header_size = binary.optional_header.sizeof_image

def extract_virtual_s (self, binary: lief.PE.Binary = None) -> None:
# Extract the virtual size of the image
self.virtual_size = binary.optional_header.sizeof_image

def extract_machine (self, binary: lief.PE.Binary = None) -> None:
# Extract the __name__ of the target machine found in the COFF header
self.target_machine = str(binary.header.machine).split('.')[-1]

def extract_sec_count (self, binary: lief.PE.Binary = None) -> None:
# Extract the number of sections within the executable the COFF header lists
self.section_count = len(binary.sections)

def extract_sec_data (self, binary: lief.PE.Binary = None) -> None:
# Extract section names and their entropy
for section in binary.sections:
name: str = section.name
entropy: float = section.entropy
self.section_info[name] = float(format(entropy, ".4f"))

def print_data (self) -> None:
print (self.sha256_hash)
print (self.header_size)
print (self.virtual_size)
print (self.target_machine)
print (self.section_count)
print (self.section_info)


# === Paste Lab 1a Methods Below This Line =========================================================


# === Lab 1b Methods Start Here ====================================================================
# Encoding Schemes:
#
Expand Down Expand Up @@ -109,19 +138,94 @@ def print_data (self) -> None:

def encode_target_machine (self) -> None:
# Encode target machine using one-hot encoding schema
pass
machine_encoding = {
"UNKNOWN": 0x01,
"I386": 0x02,
"AMD64": 0x04,
"ARM64": 0x08
}
self.encoded_target_machine = machine_encoding.get(self.target_machine, 0x01)

def encode_section_info (self) -> None:
# Encode section names using one-hot encoding schema
pass
section_encoding = {
".text": 0x000001,
".rdata": 0x000002,
".data": 0x000004,
".ndata": 0x000008,
".rsrc": 0x000010,
".itext": 0x000020,
".bss": 0x000040,
".idata": 0x000080,
".didata": 0x000100,
".edata": 0x000200,
".tls": 0x000400,
".buildid": 0x000800,
".reloc": 0x001000,
".UPX0": 0x002000,
".UPX1": 0x004000,
".qtmetad": 0x008000,
".qtmimed4": 0x010000,
".pdata": 0x020000,
".xdata": 0x040000,
".CRT": 0x080000,
".debug": 0x100000
}

for section_name, entropy in self.section_info.items():
encoded_value = section_encoding.get(section_name, 0x000000)
self.encoded_sec_info[section_name] = {
"encoded": encoded_value,
"entropy": entropy
}

def decode_target_machine (self) -> None:
# Decode one-hot target machine back to string
pass
machine_decoding = {
0x01: "UNKNOWN",
0x02: "I386",
0x04: "AMD64",
0x08: "ARM64"
}
if hasattr(self, 'encoded_target_machine'):
self.decoded_target_machine = machine_decoding.get(self.encoded_target_machine, "UNKNOWN")
else:
self.decoded_target_machine = "UNKNOWN"

def decode_section_info (self) -> None:
# Decode one-hot section names back to strings
pass
section_decoding = {
0x000001: ".text",
0x000002: ".rdata",
0x000004: ".data",
0x000008: ".ndata",
0x000010: ".rsrc",
0x000020: ".itext",
0x000040: ".bss",
0x000080: ".idata",
0x000100: ".didata",
0x000200: ".edata",
0x000400: ".tls",
0x000800: ".buildid",
0x001000: ".reloc",
0x002000: ".UPX0",
0x004000: ".UPX1",
0x008000: ".qtmetad",
0x010000: ".qtmimed4",
0x020000: ".pdata",
0x040000: ".xdata",
0x080000: ".CRT",
0x100000: ".debug"
}

self.decoded_sec_info = {}
for section_name, data in self.encoded_sec_info.items():
encoded_value = data["encoded"]
decoded_name = section_decoding.get(encoded_value, section_name)
self.decoded_sec_info[decoded_name] = {
"original_name": section_name,
"entropy": data["entropy"]
}


# === Do not edit anything below this line =========================================================
Expand Down
1 change: 0 additions & 1 deletion resources/GradeScript1a.py β†’ GradeScript1a.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
Author: Drew Wheeler
Last Edit: 2024-08-23


This file was written for Lab 1a of the AI-Assisted Malware Analysis Project,
funded by the NSF (grant #2025682).

Expand Down
24 changes: 12 additions & 12 deletions resources/GradeScript1b.py β†’ GradeScript1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@ def setUp (self) -> None:
self.sha256_hash: str = "1df56772594a5ec2f550c7727a4879142736106da68b5d185c4391e08b48ec5e"
self.target_machine: str = "I386"
self.sections: dict[str, float] = {".text": 6.4723,
".rdata": 5.2098,
".data": 4.1106,
".ndata": 0.0000,
".rsrc": 5.7320}
self.enc_target_machine: str = "0x02"
self.enc_sections: dict[str, float] = {"0x000001": 6.4723,
"0x000002": 5.2098,
"0x000004": 4.1106,
"0x000008": 0.0000,
"0x000010": 5.7320}
".rdata": 5.2098,
".data": 4.1106,
".ndata": 0.0000,
".rsrc": 5.7320}
self.enc_target_machine: int = 0x02
self.enc_sections: dict[str, dict] = {".text": {"encoded": 0x000001, "entropy": 6.4723},
".rdata": {"encoded": 0x000002, "entropy": 5.2098},
".data": {"encoded": 0x000004, "entropy": 4.1106},
".ndata": {"encoded": 0x000008, "entropy": 0.0000},
".rsrc": {"encoded": 0x000010, "entropy": 5.7320}}
self.features: FeatureExtract = FeatureExtract()
self.features.extract_features (TEST_FILE)

Expand All @@ -47,14 +47,14 @@ def test_sections (self) -> None:

def test_encode_machine (self) -> None:
self.features.encode_target_machine()
self.assertEqual (self.enc_target_machine, self.features.target_machine)
self.assertEqual (self.enc_target_machine, self.features.encoded_target_machine)
self.features.decode_target_machine()

def test_encode_sections (self) -> None:
self.features.encode_section_info()
self.assertEqual (self.enc_sections, self.features.encoded_sec_info)
self.features.decode_section_info()

def test_decode_machine (self) -> None:
self.features.encode_target_machine()
self.features.decode_target_machine()
Expand Down
Loading