|
| 1 | +#! /usr/bin/env python2 |
| 2 | + |
| 3 | + |
| 4 | +""" |
| 5 | +Copyright (C) 2009-2015 Gustavo de Oliveira. Licensed under the GPL (see the |
| 6 | +license file). |
| 7 | +
|
| 8 | +This program reads a BibTeX file and converts it to a list of references in |
| 9 | +HTML format. |
| 10 | +
|
| 11 | +To use this program you need Python installed on your computer. |
| 12 | +
|
| 13 | +To run the program, in a command-line interface enter the command |
| 14 | +
|
| 15 | + python bibtex2html.py bibtex.bib template.html output.html |
| 16 | +
|
| 17 | +Here, `bibtex.bib` is the BibTeX file that you want to convert, and |
| 18 | +`template.html` is any template file containing the following placeholders: |
| 19 | +
|
| 20 | + <!--NUMBER_OF_REFERENCES--> |
| 21 | + <!--NEWER--> |
| 22 | + <!--OLDER--> |
| 23 | + <!--DATE--> |
| 24 | + <!--LIST_OF_REFERENCES--> |
| 25 | +
|
| 26 | +These placeholders will be replaced by the program, and the result will be |
| 27 | +written to the file `output.html`. |
| 28 | +""" |
| 29 | + |
| 30 | + |
| 31 | +import sys |
| 32 | +from datetime import date |
| 33 | + |
| 34 | + |
| 35 | +def cleanup_author(s): |
| 36 | + """Clean up and format author names. |
| 37 | +
|
| 38 | + cleanup_author(str) -> str |
| 39 | + """ |
| 40 | + |
| 41 | + dictionary = {'\\"a': 'ä', '\\"A': 'Ä', '\\"e': 'ë', |
| 42 | + '\\"E': 'Ë', '\\"i': 'ï', '\\"I': 'Ï', '\\"o': 'ö', |
| 43 | + '\\"O': 'Ö', '\\"u': 'ü', '\\"U': 'Ü', "\\'a": 'á', |
| 44 | + "\\'A": 'Á', "\\'e": 'é', "\\'i": 'í', |
| 45 | + "\\'I": 'Í', "\\'E": 'É', "\\'o": 'ó', |
| 46 | + "\\'O": 'Ó', "\\'u": 'ú', "\\'U": 'Ú', |
| 47 | + '\\~n': 'ñ', '\\~N': 'Ñ', '\\~a': 'ã', |
| 48 | + '\\~A': 'Ã', '\\~o': 'õ', '\\~O': 'Õ', |
| 49 | + '.': ' ', "\\'\\": '', '{': '', '}': '', ' And ': ' and '} |
| 50 | + |
| 51 | + for k, v in dictionary.items(): |
| 52 | + s = s.replace(k, v) |
| 53 | + |
| 54 | + s = s.strip() |
| 55 | + |
| 56 | + before, sep, after = s.rpartition(' and ') |
| 57 | + before = before.replace(' and ', ', ') |
| 58 | + s = before + sep + after |
| 59 | + |
| 60 | + return s |
| 61 | + |
| 62 | + |
| 63 | +def cleanup_title(s): |
| 64 | + """Clean up and format article titles. |
| 65 | +
|
| 66 | + cleanup_title(str) -> str |
| 67 | + """ |
| 68 | + |
| 69 | + s = s.lower() |
| 70 | + s = s.capitalize() |
| 71 | + |
| 72 | + return s |
| 73 | + |
| 74 | + |
| 75 | +def cleanup_page(s): |
| 76 | + """Clean up the article page string. |
| 77 | +
|
| 78 | + cleanup_pages(str) -> str |
| 79 | + """ |
| 80 | + |
| 81 | + s = s.replace('--', '-') |
| 82 | + |
| 83 | + return s |
| 84 | + |
| 85 | + |
| 86 | + |
| 87 | +# Get the BibTeX, template, and output file names |
| 88 | +bibfile = sys.argv[1] |
| 89 | +templatefile = sys.argv[2] |
| 90 | +outputfile = sys.argv[3] |
| 91 | + |
| 92 | + |
| 93 | +# Open, read and close the BivTeX and template files |
| 94 | +with open(templatefile, 'r') as f: |
| 95 | + template = f.read() |
| 96 | + |
| 97 | +with open(bibfile, 'r') as f: |
| 98 | + datalist = f.readlines() |
| 99 | + |
| 100 | + |
| 101 | +# Discard unwanted characteres and commented lines |
| 102 | +datalist = [s.strip(' \n\t') for s in datalist] |
| 103 | +datalist = [s for s in datalist if s[:2] != '%%'] |
| 104 | + |
| 105 | + |
| 106 | +# Convert a list into a string |
| 107 | +data = '' |
| 108 | +for s in datalist: data += s |
| 109 | + |
| 110 | + |
| 111 | +# Split the data at the separators @ and put it in a list |
| 112 | +biblist = data.split('@') |
| 113 | +# Discard empty strings from the list |
| 114 | +biblist = [s for s in biblist if s != ''] |
| 115 | + |
| 116 | + |
| 117 | +# Create a list of lists containing the strings "key = value" of each bibitem |
| 118 | +listlist = [] |
| 119 | +for s in biblist: |
| 120 | + type, sep, s = s.partition('{') |
| 121 | + id, sep, s = s.partition(',') |
| 122 | + s = s.rpartition('}')[0] |
| 123 | + keylist = ['type = ' + type.lower(), 'id = ' + id] |
| 124 | + |
| 125 | + number = 0 |
| 126 | + flag = 0 |
| 127 | + i = 0 |
| 128 | + while len(s) > i: |
| 129 | + print(len(s),i,s) |
| 130 | + if s[i] == '{': |
| 131 | + number += 1 |
| 132 | + flag = 1 |
| 133 | + elif s[i] == '}': |
| 134 | + number -= 1 |
| 135 | + |
| 136 | + if number == 0 and flag == 1: |
| 137 | + keylist.append(s[:i+1]) |
| 138 | + s = s[i+1:] |
| 139 | + flag = 0 |
| 140 | + i = 0 |
| 141 | + continue |
| 142 | + |
| 143 | + i += 1 |
| 144 | + |
| 145 | + keylist = [t.strip(' ,\t\n') for t in keylist] |
| 146 | + listlist.append(keylist) |
| 147 | + |
| 148 | + |
| 149 | +# Create a list of dicts containing key : value of each bibitem |
| 150 | +dictlist = [] |
| 151 | +for l in listlist: |
| 152 | + keydict = {} |
| 153 | + for s in l: |
| 154 | + key, sep, value = s.partition('=') |
| 155 | + key = key.strip(' ,\n\t{}') |
| 156 | + key = key.lower() |
| 157 | + value = value.strip(' ,\n\t{}') |
| 158 | + keydict[key] = value |
| 159 | + |
| 160 | + dictlist.append(keydict) |
| 161 | + |
| 162 | + |
| 163 | +# Backup all the original data |
| 164 | +full_dictlist = dictlist |
| 165 | + |
| 166 | + |
| 167 | +# Keep only articles in the list |
| 168 | +dictlist = [d for d in dictlist if d['type'] == 'article' or d['type'] == 'inproceedings'] |
| 169 | +# keep only articles that have author and title |
| 170 | +dictlist = [d for d in dictlist if 'author' in d and 'title' in d] |
| 171 | +dictlist = [d for d in dictlist if d['author'] != '' and d['title'] != ''] |
| 172 | + |
| 173 | + |
| 174 | +# Get a list of the article years and the min and max values |
| 175 | +years = [int(d['year']) for d in dictlist if 'year' in d] |
| 176 | +years.sort() |
| 177 | +older = years[0] |
| 178 | +newer = years[-1] |
| 179 | + |
| 180 | + |
| 181 | +########################################################################### |
| 182 | +# Set the fields to be exported to html (following this order) |
| 183 | +mandatory = ['author', 'title'] |
| 184 | +optional = ['journal', 'eprint', 'volume', 'pages', 'year', 'url', 'doi'] |
| 185 | +########################################################################### |
| 186 | + |
| 187 | + |
| 188 | +# Clean up data |
| 189 | +for i in range(len(dictlist)): |
| 190 | + dictlist[i]['author'] = cleanup_author(dictlist[i]['author']) |
| 191 | + dictlist[i]['title'] = cleanup_title(dictlist[i]['title']) |
| 192 | + |
| 193 | + |
| 194 | +# Write down the list html code |
| 195 | +counter = 0 |
| 196 | +html = '' |
| 197 | +for y in reversed(range(older, newer + 1)): |
| 198 | + if y in years: |
| 199 | + html += '<h3 id="y{0}">{0}</h3>\n\n\n<ul>\n'.format(y) |
| 200 | + for d in dictlist: |
| 201 | + if 'year' in d and int(d['year']) == y: |
| 202 | + mandata = [d[key] for key in mandatory] |
| 203 | + if 'url' in d: |
| 204 | + print(d) |
| 205 | + html += '<p>{0}, <a href="{2}" target="_blank"><i>{1}</i></a>'.format(*mandata + [d['url']]) |
| 206 | + else: |
| 207 | + html += '<p>{0}, <i>{1}</i>'.format(*mandata) |
| 208 | + |
| 209 | + for t in optional: |
| 210 | + if t in d: |
| 211 | + if t == 'journal': html += ', {0}'.format(d[t]) |
| 212 | + if t == 'eprint': html += ':{0}'.format(d[t]) |
| 213 | + if t == 'volume': html += ' <b>{0}</b>'.format(d[t]) |
| 214 | + if t == 'pages': |
| 215 | + a = cleanup_page(d[t]) |
| 216 | + html += ', {0}'.format(a) |
| 217 | + if t == 'year': html += ', {0}'.format(d[t]) |
| 218 | + |
| 219 | + if t == 'doi': |
| 220 | + html += ' <a href="{0}" target="_blank">[doi]</a>'.format(d[t]) |
| 221 | + |
| 222 | + html += '</p>\n' |
| 223 | + counter += 1 |
| 224 | + |
| 225 | + html += '</ul>\n' |
| 226 | + |
| 227 | + |
| 228 | +# Fill up the empty fields in the template |
| 229 | +a, mark, b = template.partition('<!--LIST_OF_REFERENCES-->') |
| 230 | +a = a.replace('<!--NUMBER_OF_REFERENCES-->', str(counter), 1) |
| 231 | +a = a.replace('<!--NEWER-->', str(newer), 1) |
| 232 | +a = a.replace('<!--OLDER-->', str(older), 1) |
| 233 | +now = date.today() |
| 234 | +a = a.replace('<!--DATE-->', date.today().strftime('%d %b %Y')) |
| 235 | + |
| 236 | + |
| 237 | +# Join the header, list and footer html code |
| 238 | +final = a + html + b |
| 239 | + |
| 240 | + |
| 241 | +# Write the final result to the output file |
| 242 | +with open(outputfile, 'w') as f: |
| 243 | + f.write(final) |
0 commit comments