Skip to content

Commit f77bee7

Browse files
authored
Merge pull request #1 from noah-13/adpate-to-windows
Add Windows compatibility
2 parents 4391732 + dd0c72e commit f77bee7

7 files changed

Lines changed: 143 additions & 58 deletions

File tree

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ dmypy.json
152152
# Cython debug symbols
153153
cython_debug/
154154

155+
# Test file
156+
test.ipynb
157+
158+
155159
# PyCharm
156160
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157161
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore

documentation/general_example.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@
121121
"\n",
122122
"patterns = premise.find_patterns(premise_instances)\n",
123123
"for p in patterns:\n",
124-
" print(p)"
124+
" print(p)\n"
125125
]
126126
},
127127
{
@@ -134,9 +134,9 @@
134134
],
135135
"metadata": {
136136
"kernelspec": {
137-
"display_name": "pypremise",
137+
"display_name": "pypremise (3.13.3)",
138138
"language": "python",
139-
"name": "myenv"
139+
"name": "python3"
140140
},
141141
"language_info": {
142142
"codemirror_mode": {
@@ -148,7 +148,7 @@
148148
"name": "python",
149149
"nbconvert_exporter": "python",
150150
"pygments_lexer": "ipython3",
151-
"version": "3.12.3"
151+
"version": "3.13.3"
152152
}
153153
},
154154
"nbformat": 4,

documentation/missclassification_examples.ipynb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
},
2323
{
2424
"cell_type": "code",
25-
"execution_count": 2,
25+
"execution_count": 1,
2626
"id": "e88fc2bd-0e68-4c12-90e4-932e2e550c4c",
2727
"metadata": {},
2828
"outputs": [],
@@ -66,7 +66,7 @@
6666
},
6767
{
6868
"cell_type": "code",
69-
"execution_count": 3,
69+
"execution_count": 2,
7070
"id": "020b3ae0",
7171
"metadata": {},
7272
"outputs": [
@@ -98,7 +98,7 @@
9898
},
9999
{
100100
"cell_type": "code",
101-
"execution_count": 4,
101+
"execution_count": 3,
102102
"id": "1de56eba-f4d8-4350-83ff-9b6d09a001d9",
103103
"metadata": {},
104104
"outputs": [],
@@ -123,7 +123,7 @@
123123
},
124124
{
125125
"cell_type": "code",
126-
"execution_count": 5,
126+
"execution_count": 4,
127127
"id": "4a63c48e-db46-4247-9044-93aed2302324",
128128
"metadata": {},
129129
"outputs": [
@@ -158,7 +158,7 @@
158158
},
159159
{
160160
"cell_type": "code",
161-
"execution_count": 6,
161+
"execution_count": 5,
162162
"id": "5e4d532d-d70c-48e9-bdbb-70cadf7ddf1a",
163163
"metadata": {},
164164
"outputs": [],
@@ -203,7 +203,7 @@
203203
},
204204
{
205205
"cell_type": "code",
206-
"execution_count": 7,
206+
"execution_count": 6,
207207
"id": "6c73c738-3ffc-4b01-b5b7-022a1ff7c652",
208208
"metadata": {},
209209
"outputs": [],
@@ -216,7 +216,7 @@
216216
],
217217
"metadata": {
218218
"kernelspec": {
219-
"display_name": "pypremise (3.12.3)",
219+
"display_name": "pypremise (3.13.3)",
220220
"language": "python",
221221
"name": "python3"
222222
},
@@ -230,7 +230,7 @@
230230
"name": "python",
231231
"nbconvert_exporter": "python",
232232
"pygments_lexer": "ipython3",
233-
"version": "3.12.3"
233+
"version": "3.13.3"
234234
}
235235
},
236236
"nbformat": 4,

pyproject.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,11 @@ packages = ["pypremise"]
1919

2020

2121
[tool.setuptools.package-dir]
22-
"" = "src"
22+
"" = "src"
23+
24+
[tool.setuptools.package-data]
25+
pypremise = [
26+
"Premise_Applesilicon",
27+
"Premise_Linux",
28+
"Premise_Windows.exe",
29+
]

src/pypremise/Premise_Windows.exe

9.55 MB
Binary file not shown.

src/pypremise/core.py

Lines changed: 62 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import tempfile
2626
import time
2727
import logging
28+
import os
2829

2930
logger = logging.getLogger(__name__)
3031

@@ -106,51 +107,90 @@ def __init__(self, voc_index_to_token: Optional[Mapping[int, str]] = None,
106107
self.group_1_name = group_1_name
107108
self.premise_engine = premise_engine
108109

110+
111+
112+
109113
def find_patterns(self, instances: List[PremiseInstance]):
110114
import pypremise.io
111-
import os
112115

113-
# the Premise C++ code reads and write to files for in and output
114116
feature_file = tempfile.NamedTemporaryFile(delete=False)
117+
feature_file.close()
115118
label_file = tempfile.NamedTemporaryFile(delete=False)
119+
label_file.close()
116120
result_file = tempfile.NamedTemporaryFile(delete=False)
121+
result_file.close()
122+
123+
feature_path = os.path.abspath(feature_file.name).replace("\\", "/")
124+
label_path = os.path.abspath(label_file.name).replace("\\", "/")
125+
result_path = os.path.abspath(result_file.name).replace("\\", "/")
117126

118-
pypremise.io.write_dat_content(instances, feature_file.name, label_file.name)
127+
pypremise.io.write_dat_content(instances, feature_path, label_path)
119128

120-
# embeddings
129+
# === embeddings ===
121130
if self.embedding_index_to_vector is not None:
122131
embedding_file = tempfile.NamedTemporaryFile(delete=False)
123-
embedding_path = embedding_file.name
132+
embedding_file.close()
133+
embedding_path = os.path.abspath(embedding_file.name).replace("\\", "/")
134+
124135
max_feature_index = Premise._get_max_feature_index(instances)
125-
pypremise.io.write_embedding_file(self.embedding_index_to_vector, embedding_path,
126-
self.embedding_dimensionality, max_feature_index)
136+
pypremise.io.write_embedding_file(
137+
self.embedding_index_to_vector,
138+
embedding_path,
139+
self.embedding_dimensionality,
140+
max_feature_index
141+
)
127142
else:
128143
embedding_file = None
129144
embedding_path = ""
130145

131-
# actual Premise
146+
# === call Premise C++ program ===
132147
start_time = time.time()
133-
pypremise.io.call_premise_program(feature_file.name, label_file.name, result_file.name, embedding_path,
134-
self.embedding_dimensionality, self.max_neighbor_distance,
135-
self.fisher_p_value, self.clause_max_overlap, self.min_overlap,
136-
self.premise_engine)
137-
logger.info(f"Premise ran for {time.time() - start_time} seconds.")
138-
139-
results = pypremise.io.parse_premise_result(result_file.name, self.group_0_name, self.group_1_name)
140-
141-
# clean up temporary files
142-
os.remove(feature_file.name)
143-
os.remove(label_file.name)
144-
os.remove(result_file.name)
148+
if self.premise_engine is None:
149+
self.premise_engine = pypremise.io.get_premise_path()
150+
pypremise.io.call_premise_program(
151+
feature_path, label_path, result_path, embedding_path,
152+
self.embedding_dimensionality, self.max_neighbor_distance,
153+
self.fisher_p_value, self.clause_max_overlap, self.min_overlap,
154+
self.premise_engine
155+
)
156+
logger.info(f"Premise ran for {time.time() - start_time:.2f} seconds.")
157+
158+
# === check result file ===
159+
try:
160+
size = os.path.getsize(result_path)
161+
logger.info(f"Result file size: {size} bytes")
162+
if size == 0:
163+
logger.warning("Result file is empty — check Premise stderr or parameters.")
164+
except Exception as e:
165+
logger.error(f"Could not stat result file: {e}")
166+
167+
# === analyse results ===
168+
results = pypremise.io.parse_premise_result(
169+
result_path, self.group_0_name, self.group_1_name
170+
)
171+
172+
def safe_remove(path):
173+
try:
174+
os.remove(path)
175+
except PermissionError:
176+
time.sleep(0.2)
177+
try:
178+
os.remove(path)
179+
except Exception:
180+
pass
181+
182+
for f in [feature_path, label_path, result_path]:
183+
safe_remove(f)
145184
if embedding_file is not None:
146-
os.remove(embedding_file.name)
185+
safe_remove(embedding_path)
147186

148-
# if we have a map from indices to tokens, use it to convert our patterns indices to tokens
149187
if self.voc_index_to_token is not None:
150188
self._pattern_indices_to_tokens(results)
151189

152190
return results
153191

192+
193+
154194
def _pattern_indices_to_tokens(self, results: List[PremiseResult]):
155195
"""
156196
Converts the features in the given patterns from their index representation to their token representation.

src/pypremise/io.py

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
PREMISE_ENGINE_NAME_APPLE_SILICON = "AppleSilicon"
3636
PREMISE_LINUX_FILENAME = "Premise_Linux"
3737
PREMISE_APPLESILICON_FILENAME = "Premise_AppleSilicon"
38+
PREMISE_ENGINE_NAME_WINDOWS = "Premise_Windows"
39+
PREMISE_WINDOWS_FILENAME = "Premise_Windows.exe"
40+
3841

3942
logger = logging.getLogger(__name__)
4043

@@ -87,35 +90,62 @@ def write_embedding_file(embedding_index_to_vector, embedding_path: str, embeddi
8790
out_file.flush()
8891

8992

90-
def call_premise_program(feature_path: str, label_path: str, result_path: str, embedding_path: str,
91-
embedding_size: int, neighbor_max_distance: int, fisher_p_value: float,
92-
clause_max_overlap: float, min_overlap: float,
93-
premise_engine: Optional[str] = None) -> None:
94-
premise_path = get_premise_path(premise_engine)
93+
def call_premise_program(
94+
feature_path,
95+
label_path,
96+
result_path,
97+
embedding_path,
98+
embedding_size,
99+
neighbor_max_distance,
100+
fisher_p_value,
101+
clause_max_overlap,
102+
min_overlap,
103+
premise_engine
104+
):
105+
import subprocess, logging
106+
logger = logging.getLogger(__name__)
95107

96108
logger.info("Starting Premise. This might take a while.")
109+
110+
cmd = [
111+
premise_engine,
112+
feature_path,
113+
label_path,
114+
result_path,
115+
embedding_path,
116+
str(embedding_size),
117+
str(neighbor_max_distance),
118+
str(fisher_p_value),
119+
str(clause_max_overlap),
120+
str(min_overlap),
121+
]
122+
123+
logger.info("Premise command: " + " ".join(cmd))
124+
97125
try:
98-
stdout_output = ""
99-
with subprocess.Popen([premise_path, feature_path, label_path, result_path,
100-
embedding_path,
101-
str(embedding_size), str(neighbor_max_distance),
102-
str(fisher_p_value), str(clause_max_overlap), str(min_overlap)],
103-
stdout=subprocess.PIPE,
104-
stderr=subprocess.PIPE, text=True) as process:
105-
for line in process.stdout:
106-
logger.info(line)
107-
stdout_output += line
108-
process.wait()
109-
110-
stderr_output = process.stderr.read()
126+
process = subprocess.Popen(
127+
cmd,
128+
stdout=subprocess.PIPE,
129+
stderr=subprocess.PIPE,
130+
text=True
131+
)
132+
133+
stdout_output, stderr_output = process.communicate()
134+
135+
if stdout_output:
136+
logger.info("Premise stdout:\n" + stdout_output)
137+
if stderr_output:
138+
logger.error("Premise stderr:\n" + stderr_output)
139+
140+
if process.returncode != 0:
141+
raise Exception(
142+
f"Premise exited with code {process.returncode}. "
143+
f"stderr:\n{stderr_output}"
144+
)
111145

112146
except Exception as e:
113147
raise Exception(f"Execution of Premise failed due to '{e}'.")
114-
115-
if len(stderr_output) > 0:
116-
logger.error(f"Premise reported an error.\nThe error output was" +
117-
"\n\n-------\n{stderr}\n-------")
118-
raise Exception(f"Premise reported an error.")
148+
119149

120150

121151
def get_premise_path(premise_engine: Optional[str] = None):
@@ -158,6 +188,8 @@ def get_premise_path(premise_engine: Optional[str] = None):
158188
premise_engine = PREMISE_ENGINE_NAME_LINUX
159189
elif try_premise(os.path.join(module_path, PREMISE_APPLESILICON_FILENAME)):
160190
premise_engine = PREMISE_ENGINE_NAME_APPLE_SILICON
191+
elif try_premise(os.path.join(module_path, PREMISE_WINDOWS_FILENAME)):
192+
premise_engine = PREMISE_ENGINE_NAME_WINDOWS
161193

162194
if premise_engine != None:
163195
# we found one! Let's try to store it
@@ -179,6 +211,8 @@ def get_premise_path(premise_engine: Optional[str] = None):
179211
path = os.path.join(module_path, PREMISE_LINUX_FILENAME)
180212
elif premise_engine == PREMISE_ENGINE_NAME_APPLE_SILICON:
181213
path = os.path.join(module_path, PREMISE_APPLESILICON_FILENAME)
214+
elif premise_engine == PREMISE_ENGINE_NAME_WINDOWS:
215+
path = os.path.join(module_path, PREMISE_WINDOWS_FILENAME)
182216
else:
183217
raise Exception(f"Unknown Premise engine '{premise_engine}'")
184218

0 commit comments

Comments
 (0)