OK, that previous post had a couple of minor problems.
First that regular expression – terribly unwieldy. I haven’t managed to shorten it fully yet, but this is marginally better:
"([\w\s:;… add whatever characters are valid here…]*)(?:\",|\"$)
Secondly, I wasn’t fully thinking through the matches/groups/captures hierarchy. I kept getting empty captures being reported back without understanding why I was getting them.
Updated code here:
open System
open System.Text.RegularExpressions
let (|ActiveRegex|_|) regex str =
let ms = Regex(regex).Matches(str)
if ms.Count > 0
then Some ((Seq.cast ms : Match seq))
else None
let matches s re =
match s with
| ActiveRegex re results -> results
| _ -> Seq.empty
let capturesSeq s p =
seq{
for m in matches s p ->
Seq.skip 1 (seq{for g in m.Groups -> g.Value})
}
|> Seq.concat
let csvRegex = "\"([\w\s:;~!@#$%\^&\*_<>,\.\\\/\|\[\]\{\}\(\)\-\+\?]*)(?:\",|\"$)"
let testLine = "\"31\",\"a 1\",\"b-2\",\"c+3\",\",.;~!@#$%^&*()\/?><,.|{}[]_+-\",\"\",\"14/05/2010 12:12:20 a.m.\",\"1: 2; 3. 4? 5[ 6] 7& 8*\",\"a,b\""
capturesSeq testLine csvRegex
|> Seq.iter (fun x -> printfn "%A" x)
No comments:
Post a Comment