lib/mybibtex/generator.py at 5c17445a90de9dbb41eb018fc1eb60fb1ef150b4 · cryptobib/lib · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325


import codecs

from .database import *
from . import tools

import io
from abc import ABCMeta, abstractmethod
import logging

# WARNING: do not forge to set this variable to the correct config module when loading this module
# FIXME: need to remove this dirty hack!
config = None

class EntryFilter(object, metaclass=ABCMeta):
    """ Composable entry filters: it is possible to compose a filter FilterA with a filter FilterB by instantiating FilterA(FilterB()) """

    def __init__(self, filter_and=None):
        self.filter_and = filter_and

    @abstractmethod
    def is_selected(self, key, entry):
        """ Return true if the key, entry has to be selected and false otherwise"""
        pass

    def filter(self, entries):
        entries = iter(entries.items()) if self.filter_and == None else self.filter_and.filter(entries)
        for (k,e) in entries:
            if self.is_selected(k, e):
                yield (k,e)

class FilterPaper(EntryFilter):
    """ select papers and filters out conf """

    def is_selected(self, k, e):
        return k.auth != None

class FilterConf(EntryFilter):
    def __init__(self, confkey, filter_and=None):
        """ conf is the short name for conf EC, C, ... """
        super(FilterConf, self).__init__(filter_and = filter_and)
        self.confkey = confkey

    def is_selected(self, k, e):
        return k.confkey == self.confkey

class EntrySort(object, metaclass=ABCMeta):
    @abstractmethod
    def key(self, ke):
        pass

    def sort(self, entries):
        return sorted(entries, key=self.key)

class SortConfYearPage(EntrySort):
    def get_pages(self, key, entry):
        if "pages" in entry.fields:
            try:
                pages = entry.fields["pages"].expand().split("--")
                if len(pages) == 1:
                    return (pages[0], pages[0])
                elif len(pages) == 2:
                    return (pages[0], pages[1])
                else:
                    logging.warning("Problem in entry \"{0}\": incorrect pages !".format(key))
                    return ("0","0")
            except ValueError as e:
                logging.warning("Problem in entry \"{0}\": incorrect pages !".format(key))
                return ("0","0")
        else:
            return ("0","0")
        # Previously pages were integers but because of LIPIcs, pages can now be of the form 5:1--5:10

    def proc_year(self, year):
        full_year = tools.short_to_full_year(year)
        return 9999-full_year # to reverse the order !

    def proc_confkey(self, confkey):
        if confkey in config.confs:
            return ("a-" if config.confs[confkey]["type"] == "conf" else "b-") + config.confs[confkey]["name"]
        else:
            return "xxxxx" + confkey

    def proc_dis(self, e):
        if "crossref" not in e.fields:
            return ""
        else:
            return EntryKey.from_string(e.fields["crossref"].expand()).dis

    pattern_eprint = re.compile(r"^Cryptology ePrint Archive, Report (\d*)/(\d*)")

    def proc_eprint(self, e):
        """ This function is used to sort correctly eprint """
        empty = "xxxx/xxxxx"
        if "howpublished" not in e.fields:
            return empty

        m = self.pattern_eprint.match(e.fields["howpublished"].expand())
        if not m:
            return empty # not an eprint

        return "{:0>4d}/{:0>5d}".format(int(m.group(1)), int(m.group(2)))

    def proc_int_descending(self, val: str):
        """ proc_int_descending is used to convert the value val into an integer so that
        when the value val decreases, the integer increases.
        Assumes that either val is always an integer or is always a non-integer string """
        if val == "":
            return 0
        if not val.isdigit():
            # Convert the value into an integer, looking at the value as a big-endian byte-array
            val = int(codecs.encode(val.encode("utf8"), 'hex'), 16)
        return int(1e20-1) - int(val)

    def proc_volume(self, e):
        if "volume" not in e.fields:
            return 0
        else:
            return self.proc_int_descending(e.fields["volume"].expand())

    def proc_number(self, e):
        if "number" not in e.fields:
            return 0
        else:
            return self.proc_int_descending(e.fields["number"].expand())

    def key(self, ke):
        (k,e) = ke
        (p1, p2) = self.get_pages(k,e)

        return "{:<15}-{:0>4d}-{:<10}-{}-{:>10}-{:0>10d}-{:0>20}-{:0>20}".format(
            self.proc_confkey(k.confkey),
            self.proc_year(k.year),
            self.proc_dis(e),
            self.proc_eprint(e),
            self.proc_volume(e),
            self.proc_number(e),
            p1,
            p2
        )

def bibtex_entry_format_fields(db, key, entry, expand_crossrefs=False, expand_values=False):
    """ Return a dictionary of formatted fields """

    def format_persons(persons):
        return Value([ValuePartQuote((" and ").join([str(person) for person in persons]))])

    def format_author(author):
        res = author.expand().replace(" and ", " and\n" + " "*18)
        return Value([ValuePartQuote(res, normalize=False)])

    fields = entry.fields.copy()

    # expand persons
    for (role, persons) in entry.persons.items():
        if role not in fields:
            fields[role] = format_persons(persons)

    # format author
    if "author" in fields:
        fields["author"] = format_author(fields["author"])

    # expand crossrefs
    if expand_crossrefs:
        if "crossref" in fields:
            crossref_fields = db.entries[EntryKey.from_string(fields["crossref"].expand())].fields.copy()
            del crossref_fields["key"] # a bit of a hack TODO...
            fields = dict(list(crossref_fields.items()) + list(fields.items()))
            del fields["crossref"]

    return fields

def bibtex_write_entry(out, db, key, entry, expand_crossrefs=False, expand_values=False, remove_empty_fields=False):
    """ Write a bibtex entry in out """

    def key_sort(key):
        if key in config.first_keys:
            return "{0:03d}:{1}".format(config.first_keys.index(key),key)
        else:
            return "{0:03d}:{1}".format(len(config.first_keys),key)

    def format_type(type_):
        if type_.lower() in config.types:
            return config.types[type_.lower()]
        else:
            return type_.capitalize()

    fields = bibtex_entry_format_fields(db, key, entry, expand_crossrefs, expand_values)

    # actual writing
    out.write("@{0}{{{1},\n".format(format_type(entry.type), str(key)))

    for k in sorted(iter(fields.keys()), key=key_sort):
        # remove empty fields after expansion
        if remove_empty_fields:
            v_expanded = fields[k].to_bib(expand = True)
            if v_expanded == '""':
                continue

        v = fields[k].to_bib(expand = expand_values)

        # v_ascii only contains ascii characters
        try:
            v_ascii = v.encode("ascii").decode("ascii")
        except UnicodeEncodeError as ex:
            logging.warning("Problem of encoding in entry \"{0}\", key \"{1}\", value \"{2}\" -> replace bad caracter(s) with '?'".format(key,k,repr(v)))
            v_ascii = v.encode("ascii", "replace").decode("ascii")

        out.write("  {0:<15}{1},\n".format((k + " ="), v_ascii ))

    out.write("}")

def bibtex_write_entries(out, db, entries, *args, **kwargs):
    """ internal function used to write bibtex entries """

    for key, entry in entries:
        bibtex_write_entry(out, db, key, entry, *args, **kwargs)
        out.write("\n\n")


def bibtex_gen(out, db, entry_filter=FilterPaper(), entry_sort=SortConfYearPage(), expand_crossrefs=False, include_crossrefs=False, *args, **kwargs):
    """
    Generate bibtex file

    Options:
    @arg expand_crossrefs: expand crossrefs inside entries instead of keeping the crossref field if True,
    @arg include_crossrefs: include crossrefs in the output if True and expand_crossrefs=False,
    @arg expand_values: expand values (using macros) if True
    @arg remove_empty_fields: remove empty fields if True, empty fields are ones that are either empty or expand to an empty value
      (in case expand_values=False and multiple macros values may be used using, e.g., multiple "abbrev*.bib" files, be extra careful)
    """
    entries = dict(entry_filter.filter(db.entries))
    bibtex_write_entries(
        out,
        db,
        entry_sort.sort(iter(entries.items())),
        expand_crossrefs=expand_crossrefs,
        *args, **kwargs
    )

    if expand_crossrefs==False and include_crossrefs==True:
        # works because an entry crossrefed cannot crossref another entry
        crossrefs = dict()
        for k,e in entries.items():
            if "crossref" in e.fields:
                crossref = EntryKey.from_string(e.fields["crossref"].expand())
                if crossref not in crossrefs:
                    crossrefs[crossref] = db.entries[crossref]

        bibtex_write_entries(
            out,
            db,
            entry_sort.sort(iter(crossrefs.items())),
            expand_crossrefs=expand_crossrefs,
            *args, **kwargs
        )

def bibtex_gen_str(db, *args, **kwargs):
    out = io.StringIO()
    bibtex_gen(out, db, *args, **kwargs)
    return out.getvalue()


def sql_write_entry(out, entry, crossref=None):
    """ write entry for an entry in web2py sqlite (entry is a row corresponding to an entry)
    @entry
    @arg crossref if None, does nothing, otherwise, merge fields in entry
    """
    def key_sort(key):
        if key in config.first_keys:
            return "{0:03d}:{1}".format(config.first_keys.index(key),key)
        else:
            return "{0:03d}:{1}".format(len(config.first_keys),key)

    def format_type(type_):
        if type_.lower() in config.types:
            return config.types[type_.lower()]
        else:
            return type_.capitalize()

    key = EntryKey(entry.key_conf, entry.key_year%100, entry.key_auth, entry.key_dis)

    fields = entry.as_dict().copy()

    if crossref != None:
        del fields["crossref"]
        fields = {k: v if v != None else crossref[k] for (k,v) in fields.items()}

    if fields["start_page"]!=None:
        if fields["end_page"]==None:
            fields["pages"] = str(fields["start_page"])
        else:
            fields["pages"] = '"{}--{}"'.format(fields["start_page"], fields["end_page"])

    del fields["id"]

    del fields["key_conf"]
    del fields["key_year"]
    del fields["key_auth"]
    del fields["key_dis"]

    del fields["type"]

    del fields["start_page"]
    del fields["end_page"]

    # actual writing
    out.write("@{0}{{{1},\n".format(format_type(entry.type), str(key)))

    for k in sorted(iter(fields.keys()), key=key_sort):
        v = fields[k]
        if v == None:
            continue
        v = str(v)

        try:
            v_ascii = v.encode("ascii").decode()
        except UnicodeEncodeError as ex:
            logging.warning("Problem of encoding in entry \"{0}\", key \"{1}\", value \"{2}\" -> replace bad caracter(s) with '?'".format(key,k,repr(v)))
            v_ascii = v.encode("ascii", "replace").decode()

        out.write("  {0:<15}{1},\n".format((k + " ="), v_ascii))

    out.write("}")