-
Notifications
You must be signed in to change notification settings - Fork 134
Expand file tree
/
Copy pathlibrary.py
More file actions
245 lines (206 loc) · 9.91 KB
/
library.py
File metadata and controls
245 lines (206 loc) · 9.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
from typing import Dict
from typing import List
from typing import Union
from .model import Block
from .model import DuplicateBlockKeyBlock
from .model import Entry
from .model import ExplicitComment
from .model import Field
from .model import ImplicitComment
from .model import ParsingFailedBlock
from .model import Preamble
from .model import String
# TODO Use functools.lru_cache for library properties (which create lists when called)
class Library:
"""A collection of parsed bibtex blocks."""
def __init__(self, blocks: Union[List[Block], None] = None):
self._blocks = []
self._entries_by_key = dict()
self._strings_by_key = dict()
if blocks is not None:
self.add(blocks)
def add(self, blocks: Union[List[Block], Block], fail_on_duplicate_key: bool = False):
"""Add blocks to library.
The adding is key-safe, i.e., it is made sure that no duplicate keys are added.
for the same type (i.e., String or Entry). Duplicates are silently replaced with
a DuplicateKeyBlock.
:param blocks: Block or list of blocks to add.
:param fail_on_duplicate_key:
If True, raises ValueError if a block was replaced with a DuplicateKeyBlock.
"""
if isinstance(blocks, Block):
blocks = [blocks]
_added_blocks = []
for block in blocks:
# This may replace block with a DuplicateEntryKeyBlock
block = self._add_to_dicts(block)
self._blocks.append(block)
_added_blocks.append(block)
if fail_on_duplicate_key:
duplicate_keys = []
for original, added in zip(blocks, _added_blocks):
if original is not added and isinstance(added, DuplicateBlockKeyBlock):
duplicate_keys.append(added.key)
if len(duplicate_keys) > 0:
raise ValueError(
f"Duplicate keys found: {duplicate_keys}. "
f"Duplicate entries have been added to the library as DuplicateBlockKeyBlock."
f"Use `library.failed_blocks` to access them. "
)
def remove(self, blocks: Union[List[Block], Block]):
"""Remove blocks from library.
:param blocks: Block or list of blocks to remove.
:raises ValueError: If block is not in library."""
if isinstance(blocks, Block):
blocks = [blocks]
for block in blocks:
self._blocks.remove(block)
if isinstance(block, Entry):
del self._entries_by_key[block.key]
elif isinstance(block, String):
del self._strings_by_key[block.key]
def replace(self, old_block: Block, new_block: Block, fail_on_duplicate_key: bool = True):
"""Replace a block with another block, at the same position.
:param old_block: Block to replace.
:param new_block: Block to replace with.
:param fail_on_duplicate_key: If False, adds a DuplicateKeyBlock if
a block with new_block.key (other than old_block) already exists.
:raises ValueError: If old_block is not in library or if fail_on_duplicate_key is True
and a block with new_block.key (other than old_block) already exists."""
try:
index = self._blocks.index(old_block)
self.remove(old_block)
except ValueError:
raise ValueError("Block to replace is not in library.")
block_after_add = self._add_to_dicts(new_block)
self._blocks.insert(index, block_after_add)
if (
new_block is not block_after_add
and isinstance(block_after_add, DuplicateBlockKeyBlock)
and fail_on_duplicate_key
):
# Revert changes to old_block
# Don't fail on duplicate key, as this would lead to an infinite recursion
# (should never happen for a clean library, but could happen if the user
# tampered with the internals of the library).
self.replace(block_after_add, old_block, fail_on_duplicate_key=False)
raise ValueError("Duplicate key found.")
@staticmethod
def _cast_to_duplicate(
prev_block_with_same_key: Union[Entry, String], duplicate: Union[Entry, String]
):
assert isinstance(prev_block_with_same_key, type(duplicate)) or isinstance(
duplicate, type(prev_block_with_same_key)
), (
"Internal BibtexParser Error. Duplicate blocks share no common type."
f"Found {type(prev_block_with_same_key)} and {type(duplicate)}, but both should be"
f"either instance of String or instance of Entry."
f"Please report this issue at the bibtexparser issue tracker.",
)
assert (
prev_block_with_same_key.key == duplicate.key
), "Internal BibtexParser Error. Duplicate blocks have different keys."
return DuplicateBlockKeyBlock(
start_line=duplicate.start_line,
raw=duplicate.raw,
key=duplicate.key,
previous_block=prev_block_with_same_key,
duplicate_block=duplicate,
)
def _add_to_dicts(self, block):
"""Safely add block references to private dict structures.
:param block: Block to add.
:returns: The block that was added to the library. If a block
of same type and with same key already existed, a
DuplicateKeyBlock is returned (not added to dict).
"""
if isinstance(block, Entry):
try:
prev_block_with_same_key = self._entries_by_key[block.key]
block = self._cast_to_duplicate(prev_block_with_same_key, block)
except KeyError:
# No duplicate found
self._entries_by_key[block.key] = block
elif isinstance(block, String):
try:
prev_block_with_same_key = self._strings_by_key[block.key]
block = self._cast_to_duplicate(prev_block_with_same_key, block)
except KeyError:
# No duplicate found
self._strings_by_key[block.key] = block
return block
@property
def blocks(self) -> List[Block]:
"""All blocks in the library, preserving order of insertion."""
return self._blocks
@property
def failed_blocks(self) -> List[ParsingFailedBlock]:
"""All blocks that could not be parsed, preserving order of insertion."""
return [b for b in self._blocks if isinstance(b, ParsingFailedBlock)]
@property
def duplicate_blocks(self) -> List[DuplicateBlockKeyBlock]:
"""All blocks that could not be parsed, preserving order of insertion."""
return [b for b in self._blocks if isinstance(b, DuplicateBlockKeyBlock)]
@property
def strings(self) -> List[String]:
"""All @string blocks in the library, preserving order of insertion."""
return list(self._strings_by_key.values())
@property
def strings_dict(self) -> Dict[str, String]:
"""Dict representation of all @string blocks in the library."""
return self._strings_by_key
@property
def entries(self) -> List[Entry]:
"""All entry (@article, ...) blocks in the library, preserving order of insertion."""
# Note: Taking this from the entries dict would be faster, but does not preserve order
# e.g. in cases where `replace` has been called.
return [b for b in self._blocks if isinstance(b, Entry)]
@property
def entries_dict(self) -> Dict[str, Entry]:
"""Dict representation of all entry blocks in the library."""
return self._entries_by_key.copy()
@property
def preambles(self) -> List[Preamble]:
"""All @preamble blocks in the library, preserving order of insertion."""
return [block for block in self._blocks if isinstance(block, Preamble)]
@property
def comments(self) -> List[Union[ExplicitComment, ImplicitComment]]:
"""All comment blocks in the library, preserving order of insertion."""
return [
block for block in self._blocks if isinstance(block, (ExplicitComment, ImplicitComment))
]
def filter(self,
filter: Dict,
case_sensitive = False
) -> List[Entry]:
""" Return filtered list of entries. Filter is a dict."""
entries = []
# Transform List in set
for k in filter.keys():
if not isinstance(filter[k], set):
if isinstance(filter[k], List):
filter[k] = set([x.lower() if not case_sensitive and isinstance(x, str) else x for x in filter[k]])
else:
x = filter[k].lower() if not case_sensitive and isinstance(filter[k], str) else filter[k]
filter[k] = set([x])
for block in self._blocks:
if isinstance(block, Entry):
found = True
for key in filter.keys():
if key in block.fields_dict.keys():
if isinstance(block.fields_dict[key], Field):
if isinstance(block.fields_dict[key].value, List):
bset = set([x.lower() if not case_sensitive and isinstance(x, str) else x for x in block.fields_dict[key].value])
else:
x = block.fields_dict[key].value.lower() if not case_sensitive and isinstance(block.fields_dict[key].value, str) else block.fields_dict[key].value
bset = set([x])
if not set(bset).intersection(filter[key]):
found = False
break
else:
found = False
else:
found = False
if found:
entries.append(block)
return entries