Skip to content

Commit 1217d71

Browse files
committed
Little optimisations to record parsing
1 parent 6cb12b9 commit 1217d71

1 file changed

Lines changed: 5 additions & 4 deletions

File tree

src/record.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,11 @@ namespace warc2text {
6565
if (header.count("warc-target-uri") == 1) {
6666
// respect the original casing
6767
url = header["warc-target-uri"];
68-
}
6968

70-
if (!url.empty() && url[0] == '<' && url[url.size()-1] == '>')
71-
url = url.substr(1, url.size()-2);
69+
// Remove any "<" and ">" wrappings from the URL
70+
if (!url.empty() && url[0] == '<' && url[url.size()-1] == '>')
71+
url = url.substr(1, url.size()-2);
72+
}
7273

7374
if (header.count("content-type") == 1) {
7475
WARCcontentType = header["content-type"];
@@ -80,7 +81,7 @@ namespace warc2text {
8081
}
8182

8283
payload_start = last_pos;
83-
if (header["warc-type"] == "response") {
84+
if (recordType == "response") {
8485
// parse HTTP header
8586
pos = content.find("HTTP/1.", last_pos);
8687
if (pos == last_pos) { // found HTTP header

0 commit comments

Comments
 (0)