Wait the light to fall

解析结构化文本

焉知非鱼

数据样例 #

[28/04/2015 12:32] Title1

content line 1
content line 2
content line 3
content line 4
content line 5

[28/04/2015 12:16] Title2

content line 6
content line 7

[27/04/2015 17:30] ​Title3

content line 8
content line 9
content line 10

Grammar #

grammar StructedText {
    token TOP { ^ <entry>+ $ }
    token entry {
        <head> \s*   # 每一项有一个标题
        <line>+ \s*  # 每个标题下面有很多行
    }
    
    token head     { '[' <datetime> ']' \s+ <title> }
    token datetime {  <filedate> \s+  <filetime> }
    token filedate { [\d+]+ % '/' }
    token filetime { [\d+]+ % ':' }
    token title    { \N+          }
    token line  {
        [
		    <!head>       # 前面不是 head 标题
            .             # 点号匹配换行符
        ]+
    }
}

Action #

class StructedText::Actions {
    method line    ($/) { $/.make: ~$/                            }
    method filedate($/) { $/.make: ~$/.subst(rx/<[:/]>/, '-', :g) }
    method head    ($/) { $/.make: ~$/.subst(rx/<[:/]>/, '-', :g) }
    method entry   ($/) { make $<head>.ast => $<line>».made;      }
    method TOP     ($/) { $/.make: $<entry>».ast;                 }    
}

解析 #

my $actions = StructedText::Actions.new;
my $parsed = StructedText.parsefile('sample.txt', :$actions).made;
if $parsed {
    for @$parsed -> $e {
        my $filename = ~$e.key.match(/'[' <( <-[\[\]]>+ )> ']'/)  ~ ".txt";
        my $fh = open $filename, :w; 
        $fh.say: ~$e.key;
        for $e.value -> $v {
            $fh.say: $v;
        }
        $fh.close;
        say "生成文件 $filename ";       
    }
}