re.k 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. / Inspired by [[http://nsl.com/papers/re.htm]] by Raul Miller
  2. / Expanded to J at [[https://code.jsoftware.com/wiki/Essays/RegularExpressions/NondeterministicFiniteAutomata]]
  3. pr:{r:@[;;|;]\[(!0)@&1+|/x;x;!#x];(!#x)^'r@'x-1}
  4. exp:{@[@/r;&=':*|r:1(&1+x<"("=)\y/,'"()";:;"|"]}
  5. msk:{-1_0,<\"\\"=x}
  6. dp:{d:(-x<"|"=y)+2*(-*t)++\-/t:x</:"()"=\:y
  7. $[1<|/dd:(-*t)++\-/t:x</:"[]"=\:y;`err@"bad character class"
  8. |/0,~^"()"?y@&x<dd;`err@"bad character class"
  9. |/dd<0;`err@"bad character class";]
  10. d+dd}
  11. cls:{$[3>#x;x
  12. |/0>r:-/i:x@-1 0++1_,':&~|':<\"-"=x," ";`err@"bad class"
  13. ,/`c$i[1]+!'1+r]}
  14. prs:{ t:dp[m:msk e] e:exp[0,msk[x],0;x]
  15. (e;t;m):(e;t;m)@\:&~|/m</:"])"=\:e
  16. (e;t;m):(e;t;m)@\:&~<\"\\"~/:e
  17. t:pr t
  18. e:@[e;?(t,&|/"*+?.|^$"=\:e)^&m;`i$]
  19. t:@[t;;:;].,'/{|y@1(-1+)\&|/x[y]~\:/:"?*+"+0}[e]'=t
  20. (e;t)}
  21. cmp0:{[e;t] d:(~e[t]~\:"["+0)&|/e~\:/:"^$"+0
  22. (e;t):(-+/d)_/:(::;(<g)@)@'(e;t)@\:g:<d
  23. s:{$[`c~_@n:x@z;step@n
  24. n="?"+0;maybe@o[x;y]@*y[z]
  25. n="*"+0;rep@o[x;y]@*y[z]
  26. n="+"+0;rep1@o[x;y]@*y[z]
  27. n~"("+0;or@o[x;y]'y[z]
  28. n~"|"+0;seq@o[x;y]'y[z]
  29. n~"["+0;$["^"=*c:x@y[z];nstep@,cls@1_c;step@,cls@c]
  30. n~"."+0;nstep@,!0
  31. x@z]}[e;(=0N,1_t),(,0N)!,!0]@0
  32. fix s}
  33. cmp:{cmp0.(prs x)}
  34. anc:{[n;t]s:&n~\:"|"+0;e:s^'*'|'&'s=\:t
  35. +(((s;s);(e;s)))@\:'&'~|/'n[(s+1;e)]~\:/:'("(^";"$$")+0}
  36. scmp:{(n;t):prs x
  37. (n;t):(::;(<g)@)@'(n;t)@\:g:{y,,/o[x]'x y}[(=0N,1_t),(,0N)!,!0]0
  38. a:anc[n;t]
  39. g:@[m;0 1+/:,/(::;(#m)-2+)@'(1|:\(m:&1+@[&#t;a[0];+;2]))?'a[0];:;0N 2#(#t)+!2*#,/a[0]]
  40. n,:(2*#,/a[0])#0+"*."
  41. t,:,/+(,/a[1];(#t)+2*!#,/a[0])
  42. cmp0.(::;(<g)@)@'(n;t)@\:g}
  43. non:256
  44. nul:(non+1)#,!0
  45. final: {(#x)-1}
  46. branch:{,@[nul;non;:;,/x]}
  47. step:{branch[,1],(, @[nul;,/x;:;2]),,nul}
  48. nstep:{branch[,1],(, @[nul;(!non)^,/x;:;2]),,nul}
  49. maybe:{branch[1,#x],1+x}
  50. seq:{.[,/(+\0,-1+#'x)+(-1_'x),,,nul;0,non;,;(~#x)#1]}
  51. rep1:{x[,final x;non],:,0,#x; x,,nul}
  52. rep:maybe@rep1@
  53. or:{branch[m],(,/+/(*|n;x)*1~:\(m+c-1)=x+:m:-1_n:+\1,c:#'x),,nul}
  54. re:seq@step'
  55. comb: {?y,,//x@y}
  56. tc: {comb[x;]'/x}
  57. fix: {comb[tc x[;non];]''x}
  58. next: {?,/x[0][y;x[1]z]}
  59. match:{n:next[(x;"",y)]
  60. final[x]=|/*(&/#:'){(a;b):y;(x[a;*b];1_b)}[n]/(x[0;non];!#y)}
  61. nfa2dfa:{c:1((@/1<:\?:)','/x@)\;r:{y,,/(x'(~#:')_^[;*+y]@?*|:)'y}[c]/,c x[0;re.non]
  62. (&|/'~^(,re.final[x])?*r;0^?/r:+r)}
  63. nfacmp:nfa2dfa@cmp@
  64. nfascmp:nfa2dfa@scmp@
  65. nfamatch:{~^x[0]?0{x[y;z]}[x[1]]/y}