-
Notifications
You must be signed in to change notification settings - Fork 431
Expand file tree
/
Copy pathreadqmd.lua
More file actions
292 lines (259 loc) · 8.79 KB
/
readqmd.lua
File metadata and controls
292 lines (259 loc) · 8.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
-- read qmd with quarto syntax extensions and produce quarto's extended AST
-- Copyright (C) 2023-2024 Posit Software, PBC
--
-- Originally by Albert Krewinkel
local md_shortcode = require("lpegshortcode")
local md_fenced_div = require("lpegfenceddiv")
-- Support the same format extensions as pandoc's Markdown reader
Extensions = pandoc.format.extensions 'markdown'
-- we replace invalid tags with random strings of the same size
-- to safely allow code blocks inside pipe tables
-- note that we can't use uppercase letters here
-- because pandoc canonicalizes classes to lowercase.
local function random_string(size)
local chars = "abcdefghijklmnopqrstuvwxyz"
local lst = {}
for _ = 1,size do
local ix = math.random(1, #chars)
table.insert(lst, string.sub(chars, ix, ix))
end
return table.concat(lst, "")
end
local function find_invalid_tags(str)
-- [^.=\n]
-- we disallow "." to avoid catching {.python}
-- we disallow "=" to avoid catching {foo="bar"}
-- we disallow "\n" to avoid multiple lines
-- no | in lua patterns...
-- (c standard, 7.4.1.10, isspace function)
-- %s catches \n and \r, so we must use [ \t\f\v] instead
local patterns = {
"^[ \t\f\v]*(```+[ \t\f\v]*)(%{+[^.=\n\r]*%}+)",
"\n[ \t\f\v]*(```+[ \t\f\v]*)(%{+[^.=\n\r]+%}+)"
}
local function find_it(init)
for _, pattern in ipairs(patterns) do
local range_start, range_end, ticks, tag = str:find(pattern, init)
if range_start ~= nil then
return range_start, range_end, ticks, tag
end
end
return nil
end
local init = 1
local range_start, range_end, ticks, tag = find_it(init)
local tag_set = {}
local tags = {}
while tag ~= nil do
init = range_end + 1
if not tag_set[tag] then
tag_set[tag] = true
table.insert(tags, tag)
end
range_start, range_end, ticks, tag = find_it(init)
end
return tags
end
local function escape_invalid_tags(str)
local tags = find_invalid_tags(str)
-- we must now replace the tags in a careful order. Specifically,
-- we can't replace a key that's a substring of a larger key without
-- first replacing the larger key.
--
-- ie. if we replace {python} before {{python}}, Bad Things Happen.
-- so we sort the tags by descending size, which suffices
table.sort(tags, function(a, b) return #b < #a end)
local replacements = {}
for _, k in ipairs(tags) do
local replacement
local attempts = 1
repeat
replacement = random_string(#k)
attempts = attempts + 1
until str:find(replacement, 1, true) == nil or attempts == 100
if attempts == 100 then
-- luacov: disable
print("Internal error, could not find safe replacement for "..k.." after 100 tries")
print("Please file a bug at https://github.com/quarto-dev/quarto-cli")
os.exit(1)
-- luacov: enable
end
-- replace all lua special pattern characters with their
-- escaped versions
local safe_pattern = k:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1")
replacements[replacement] = k
local patterns = {
"^([ \t\f\v]*```+[ \t\f\v]*)" .. safe_pattern,
"(\n[ \t\f\v]*```+[ \t\f\v]*)" .. safe_pattern
}
str = str:gsub(patterns[1], "%1" .. replacement):gsub(patterns[2], "%1" .. replacement)
end
return str, replacements
end
local function unescape_invalid_tags(str, tags)
for replacement, k in pairs(tags) do
-- replace all lua special replacement characters with their
-- escaped versions, so that when we restore the behavior,
-- we don't accidentally create a pattern
local result = k:gsub("([$%%])", "%%%1")
str = str:gsub(replacement, result)
end
return str
end
-- Convert a hexadecimal string back to the original string
local function hex_to_string(hex)
return (hex:gsub('..', function(cc)
return string.char(tonumber(cc, 16))
end))
end
local function readqmd(txt, opts)
local uuid_pattern = "b58fc729%-690b%-4000%-b19f%-365a4093b2ff;([A-Fa-f0-9]+);"
local tags
txt = md_fenced_div.attempt_to_fix_fenced_div(txt)
txt, tags = escape_invalid_tags(txt)
txt = md_shortcode.parse_md_shortcode_2(txt)
local flavor = {
format = "markdown",
extensions = {},
}
if param("user-defined-from") then
flavor = _quarto.format.parse_format(param("user-defined-from"))
else
for k, v in pairs(opts.extensions) do
flavor.extensions[v] = true
end
end
-- ### Opt-out some extensions that we know we won't support for now ###
-- https://pandoc.org/MANUAL.html#extension-table_attributes
-- https://github.com/quarto-dev/quarto-cli/pull/13249#issuecomment-3715267414
-- Only disable if the extension is actually supported by the format
local all_exts = pandoc.format.all_extensions(flavor.format)
if all_exts:includes('table_attributes') then
flavor.extensions["table_attributes"] = false
end
-- Format flavor, i.e., which extensions should be enabled/disabled.
local function restore_invalid_tags(tag)
return tags[tag] or tag
end
-- parse_shortcode overparses shortcodes inside code blocks, link targets, etc.
-- so we need to undo that damage here
local unshortcode_text = function (c)
c.text = c.text:gsub(uuid_pattern, hex_to_string)
return c
end
local function filter_attrs(el)
for k,v in pairs(el.attributes) do
if type(v) == "string" then
local new_str = v:gsub(uuid_pattern, hex_to_string)
-- we avoid always assigning to slightly workaround
-- what appears to be a foundational problem with Pandoc's Lua API
-- while accessing attributes with repeated keys.
-- Quarto is still going to be broken for the case
-- where there are shortcodes inside values of attributes with
-- repeated keys:
--
-- []{k='{{< meta k1 >}}' k='{{< meta k2 >}}'}
--
-- But I don't know how to work around this.
if new_str ~= v then
el.attributes[k] = new_str
end
end
end
return el
end
local doc = pandoc.read(txt or "", flavor, opts):walk {
CodeBlock = function (cb)
cb.classes = cb.classes:map(restore_invalid_tags)
cb.text = cb.text:gsub(uuid_pattern, hex_to_string)
cb.text = unescape_invalid_tags(cb.text, tags)
return cb
end,
Code = unshortcode_text,
RawInline = unshortcode_text,
RawBlock = unshortcode_text,
Math = unshortcode_text,
Header = filter_attrs,
Span = filter_attrs,
Div = filter_attrs,
Link = function (l)
l = filter_attrs(l)
l.target = l.target:gsub(uuid_pattern, hex_to_string)
return l
end,
Image = function (i)
i = filter_attrs(i)
-- Replace UUID-encoded shortcodes in i.src
i.src = i.src:gsub(uuid_pattern, hex_to_string)
return i
end,
Str = function(str_node)
local str = str_node.text
-- Quick check: if UUID not present at all, return as-is
if not str:find("b58fc729-690b-4000-b19f-365a4093b2ff", 1, true) then
return nil
end
local result = pandoc.Inlines{}
local pos = 1
while true do
local match_start, match_end, hex_content = str:find(uuid_pattern, pos)
if not match_start then
-- No more matches; append remaining string if any
if pos <= #str then
table.insert(result, pandoc.Str(str:sub(pos)))
end
break
end
-- Append prefix before the match as a Str node (if non-empty)
if match_start > pos then
table.insert(result, pandoc.Str(str:sub(pos, match_start - 1)))
end
-- Convert hex to original shortcode string
local shortcode_text = hex_to_string(hex_content)
-- Parse the shortcode to markdown span syntax
local parsed_md = md_shortcode.parse_md_shortcode(shortcode_text) or ""
-- Convert to Pandoc inlines via pandoc.read
local doc = pandoc.read(parsed_md, "markdown")
local inlines = doc.blocks[1] and doc.blocks[1].content or pandoc.Inlines{}
-- Append the inlines to result
for _, inline in ipairs(inlines) do
table.insert(result, inline)
end
-- Move position past the match
pos = match_end + 1
end
return result
end
}
return doc
end
local reader_option_keys = {
"abbreviations",
"columns",
"default_image_extension",
"extensions",
"indented_code_classes",
"standalone",
"strip_comments",
"tab_stops",
"track_changes",
}
local function options_to_meta(opts)
local result = {}
for _, key in ipairs(reader_option_keys) do
result[key] = opts[key]
end
return result
end
local function meta_to_options(meta)
local result = {}
for _, key in ipairs(reader_option_keys) do
result[key] = meta[key]
end
return pandoc.ReaderOptions(result)
end
return {
readqmd = readqmd,
options_to_meta = options_to_meta,
meta_to_options = meta_to_options
}