Skip to content

Commit 112932e

Browse files
committed
adding a fallback for charset using http-equiv
1 parent 3e4f6f6 commit 112932e

3 files changed

Lines changed: 15 additions & 0 deletions

File tree

dist/lib/fallback.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,11 @@ function fallback(ogObject, options, $, body) {
198198
else if (doesElementExist('head > meta[name="charset"]', 'content', $)) {
199199
ogObject.charset = $('head > meta[name="charset"]').attr('content');
200200
}
201+
else if (doesElementExist('head > meta[http-equiv="content-type"]', 'content', $)) {
202+
const content = $('head > meta[http-equiv="content-type"]').attr('content');
203+
const charsetRegEx = /charset=([^()<>@,;:"/[\]?.=\s]*)/i;
204+
ogObject.charset = charsetRegEx.test(content) ? charsetRegEx.exec(content)[1] : 'UTF-8';
205+
}
201206
else if (body) {
202207
ogObject.charset = chardet_1.default.detect(Buffer.from(body)) || '';
203208
}

lib/fallback.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ export function fallback(ogObject: OgObjectInteral, options: OpenGraphScraperOpt
175175
ogObject.charset = $('meta').attr('charset');
176176
} else if (doesElementExist('head > meta[name="charset"]', 'content', $)) {
177177
ogObject.charset = $('head > meta[name="charset"]').attr('content');
178+
} else if (doesElementExist('head > meta[http-equiv="content-type"]', 'content', $)) {
179+
const content = $('head > meta[http-equiv="content-type"]').attr('content');
180+
const charsetRegEx = /charset=([^()<>@,;:"/[\]?.=\s]*)/i;
181+
ogObject.charset = charsetRegEx.test(content) ? charsetRegEx.exec(content)[1] : 'UTF-8';
178182
} else if (body) {
179183
ogObject.charset = chardet.detect(Buffer.from(body)) || '';
180184
}

tests/unit/fallback.spec.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,12 @@ describe('fallback', function () {
425425
expect(ogObject.charset).to.be.eql('bar');
426426
expect(ogObject).to.have.all.keys('charset');
427427
});
428+
it('when there is a meta tag with http-equiv charset', function () {
429+
const $ = load('<html><head><meta http-equiv="Content-Type" content="text/html; charset=foo_bar"></head></html>');
430+
const ogObject = fallback({}, {}, $, '');
431+
expect(ogObject.charset).to.be.eql('foo_bar');
432+
expect(ogObject).to.have.all.keys('charset');
433+
});
428434
it('when trying to get a charset from the body', function () {
429435
const body = '<html><head></head></html>';
430436
const $ = load(body);

0 commit comments

Comments
 (0)