Regex match ONLY new lines in quotes in a larger string

Hayden Perry Source

I have a large CSV file and I want to find a regex capable of removing all new lines within quotes.

a123,b123,c123,d123,
a123,b123,c123,d123,
a123,b123,"c
123",d123,
a123,b123,c123,d123,
"a
123",b123,c123,d123,
a123,b123,c123,"d
1
2
3",
a123,b123,c123,d123,
a123,b123,c123,d123,
a123,b123,c123,d123

In this example, I have multiple rows where there is a new line in the quotes and even one where there are multiple new lines. I need to remove all of these new lines provided that they are within quotes. Is this possible?

Expected result:

a123,b123,c123,d123,
a123,b123,c123,d123,
a123,b123,"c123",d123,
a123,b123,c123,d123,
"a123",b123,c123,d123,
a123,b123,c123,"d123",
a123,b123,c123,d123,
a123,b123,c123,d123,
a123,b123,c123,d123

Node.js code:

var request = require('request')

request.post({
    url: '...',
}, function (error, response, body) {
    var formatted = body.replace(/.../g, '<#>')
})
javascriptregexcsv

Answers

answered 6 months ago lmcarreiro #1

You can use regex to match the content between quotes, then iterate through these matches and replace each one by the version without line-breaks...

let str = `a123,b123,c123,d123,
a123,b123,c123,d123,
a123,b123,"c
123",d123,
a123,b123,c123,d123,
"a
123",b123,c123,d123,
a123,b123,c123,"d
1
2
3",
a123,b123,c123,d123,
a123,b123,c123,d123,
a123,b123,c123,d123`;

const matches = str.match(/"[^"]+"/g);

matches.forEach(m => str = str.replace(m, m.replace(/[\r\n]/g, "")));

console.log(str);

Output in chrome console:

a123,b123,c123,d123,
a123,b123,c123,d123,
a123,b123,"c123",d123,
a123,b123,c123,d123,
"a123",b123,c123,d123,
a123,b123,c123,"d123",
a123,b123,c123,d123,
a123,b123,c123,d123,
a123,b123,c123,d123

Your Node.js code would be like this:

var request = require('request')

request.post({
    url: '...',
}, function (error, response, body) {
    let formatted = body;
    const matches = body.match(/"[^"]+"/g);
    matches.forEach(m => formatted = formatted.replace(m, m.replace(/[\r\n]/g, "")));
    console.log(formatted);
})

comments powered by Disqus