summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cook/parse-internals.sld3
-rw-r--r--cook/parse.scm8
-rw-r--r--cook/parse.sld3
-rw-r--r--cook/unicode.sld201
-rw-r--r--tests/parse.scm4
5 files changed, 208 insertions, 11 deletions
diff --git a/cook/parse-internals.sld b/cook/parse-internals.sld
index 1728f1d..d73a3c3 100644
--- a/cook/parse-internals.sld
+++ b/cook/parse-internals.sld
@@ -8,7 +8,8 @@
(chibi char-set)
(chibi parse)
(chibi regexp)
- (chibi string))
+ (chibi string)
+ (cook unicode))
(export amount?
amount-quantity
amount-unit
diff --git a/cook/parse.scm b/cook/parse.scm
index 0fc4744..fd6c70c 100644
--- a/cook/parse.scm
+++ b/cook/parse.scm
@@ -85,12 +85,6 @@ https://github.com/cooklang/spec/blob/main/EBNF.md
lis))
-(define whitespace-chars (char-set-union
- (char-set #\space #\x00A0 #\x1680)
- (char-set #\x202F #\x205F #\x3000)
- (ucs-range->char-set #x2000 #x200B)))
-(define newline-chars (char-set #\x000A #\x000D #\x0085 #\x2028 #\x2029))
-(define punctuation-chars (char-set #\. #\{ #\})) ;; TODO: do it right
(define word-chars (char-set-difference char-set:full
punctuation-chars
newline-chars
@@ -101,7 +95,7 @@ https://github.com/cooklang/spec/blob/main/EBNF.md
(char-set #\@ #\# #\~)))
(define unit-chars (char-set-difference text-chars (char-set #\})))
(define component-chars (char-set-difference text-chars
- (char-set #\{ #\})))
+ punctuation-chars))
(define component-word-chars (char-set-difference component-chars
newline-chars
whitespace-chars))
diff --git a/cook/parse.sld b/cook/parse.sld
index 1dcfb35..1af9734 100644
--- a/cook/parse.sld
+++ b/cook/parse.sld
@@ -8,7 +8,8 @@
(chibi char-set)
(chibi parse)
(chibi regexp)
- (chibi string))
+ (chibi string)
+ (cook unicode))
(export amount?
amount-quantity
amount-unit
diff --git a/cook/unicode.sld b/cook/unicode.sld
new file mode 100644
index 0000000..9d3cbcd
--- /dev/null
+++ b/cook/unicode.sld
@@ -0,0 +1,201 @@
+(define-library (cook unicode)
+ (import (chibi)
+ (srfi 14))
+ (export whitespace-chars
+ newline-chars
+ punctuation-chars)
+ (begin
+ (define whitespace-chars (char-set-union
+ (char-set #\space #\x00A0 #\x1680)
+ (char-set #\x202F #\x205F #\x3000)
+ (ucs-range->char-set #x2000 #x200B)))
+ (define newline-chars
+ (char-set #\x000A #\x000D #\x0085 #\x2028 #\x2029))
+ (define punctuation-chars
+ (char-set-union
+ ;; Pc class
+ (char-set #\x005F #\x203F #\x2040 #\x2054 #\xFE33 #\xFE34 #\xFE4D
+ #\xFE4E #\xFE4F #\xFF3F)
+ ;; Pd
+ (char-set #\x002D #\x058A #\x05BE #\x1400 #\x1806 #\x2010 #\x2011
+ #\x2012 #\x2013 #\x2014 #\x2015 #\x2E17 #\x2E1A #\x2E3A
+ #\x2E3B #\x2E40 #\x301C #\x3030 #\x30A0 #\xFE31 #\xFE32
+ #\xFE58 #\xFE63 #\xFF0D #\x10EAD)
+
+ ;; Pe
+ (char-set #\x0029 #\x005D #\x007D #\x0F3B #\x0F3D #\x169C #\x2046
+ #\x207E #\x208E #\x2309 #\x230B #\x232A #\x2769 #\x276B
+ #\x276D #\x276F #\x2771 #\x2773 #\x2775 #\x27C6 #\x27E7
+ #\x27E9 #\x27EB #\x27ED #\x27EF #\x2984 #\x2986 #\x2988
+ #\x298A #\x298C #\x298E #\x2990 #\x2992 #\x2994 #\x2996
+ #\x2998 #\x29D9 #\x29DB #\x29FD #\x2E23 #\x2E25 #\x2E27
+ #\x2E29 #\x3009 #\x300B #\x300D #\x300F #\x3011 #\x3015
+ #\x3017 #\x3019 #\x301B #\x301E #\x301F #\xFD3E #\xFE18
+ #\xFE36 #\xFE38 #\xFE3A #\xFE3C #\xFE3E #\xFE40 #\xFE42
+ #\xFE44 #\xFE48 #\xFE5A #\xFE5C #\xFE5E #\xFF09 #\xFF3D
+ #\xFF5D #\xFF60 #\xFF63)
+
+ ;; Pf
+ (char-set #\x00BB #\x2019 #\x201D #\x203A #\x2E03 #\x2E05 #\x2E0A
+ #\x2E0D #\x2E1D #\x2E21)
+
+ ;; Pi
+ (char-set #\x00AB #\x2018 #\x201B #\x201C #\x201F #\x2039 #\x2E02
+ #\x2E04 #\x2E09 #\x2E0C #\x2E1C #\x2E20)
+
+ ;; Po
+ (char-set #\x0021 #\x0022 #\x0023 #\x0025 #\x0026 #\x0027 #\x002A
+ #\x002C #\x002E #\x002F #\x003A #\x003B #\x003F #\x0040
+ #\x005C #\x00A1 #\x00A7 #\x00B6 #\x00B7 #\x00BF #\x037E
+ #\x0387 #\x055A #\x055B #\x055C #\x055D #\x055E #\x055F
+ #\x0589 #\x05C0 #\x05C3 #\x05C6 #\x05F3 #\x05F4 #\x0609
+ #\x060A #\x060C #\x060D #\x061B #\x061E #\x061F #\x066A
+ #\x066B #\x066C #\x066D #\x06D4 #\x0700 #\x0701 #\x0702
+ #\x0703 #\x0704 #\x0705 #\x0706 #\x0707 #\x0708 #\x0709
+ #\x070A #\x070B #\x070C #\x070D #\x07F7 #\x07F8 #\x07F9
+ #\x0830 #\x0831 #\x0832 #\x0833 #\x0834 #\x0835 #\x0836
+ #\x0837 #\x0838 #\x0839 #\x083A #\x083B #\x083C #\x083D
+ #\x083E #\x085E #\x0964 #\x0965 #\x0970 #\x09FD #\x0A76
+ #\x0AF0 #\x0C77 #\x0C84 #\x0DF4 #\x0E4F #\x0E5A #\x0E5B
+ #\x0F04 #\x0F05 #\x0F06 #\x0F07 #\x0F08 #\x0F09 #\x0F0A
+ #\x0F0B #\x0F0C #\x0F0D #\x0F0E #\x0F0F #\x0F10 #\x0F11
+ #\x0F12 #\x0F14 #\x0F85 #\x0FD0 #\x0FD1 #\x0FD2 #\x0FD3
+ #\x0FD4 #\x0FD9 #\x0FDA #\x104A #\x104B #\x104C #\x104D
+ #\x104E #\x104F #\x10FB #\x1360 #\x1361 #\x1362 #\x1363
+ #\x1364 #\x1365 #\x1366 #\x1367 #\x1368 #\x166E #\x16EB
+ #\x16EC #\x16ED #\x1735 #\x1736 #\x17D4 #\x17D5 #\x17D6
+ #\x17D8 #\x17D9 #\x17DA #\x1800 #\x1801 #\x1802 #\x1803
+ #\x1804 #\x1805 #\x1807 #\x1808 #\x1809 #\x180A #\x1944
+ #\x1945 #\x1A1E #\x1A1F #\x1AA0 #\x1AA1 #\x1AA2 #\x1AA3
+ #\x1AA4 #\x1AA5 #\x1AA6 #\x1AA8 #\x1AA9 #\x1AAA #\x1AAB
+ #\x1AAC #\x1AAD #\x1B5A #\x1B5B #\x1B5C #\x1B5D #\x1B5E
+ #\x1B5F #\x1B60 #\x1BFC #\x1BFD #\x1BFE #\x1BFF #\x1C3B
+ #\x1C3C #\x1C3D #\x1C3E #\x1C3F #\x1C7E #\x1C7F #\x1CC0
+ #\x1CC1 #\x1CC2 #\x1CC3 #\x1CC4 #\x1CC5 #\x1CC6 #\x1CC7
+ #\x1CD3 #\x2016 #\x2017 #\x2020 #\x2021 #\x2022 #\x2023
+ #\x2024 #\x2025 #\x2026 #\x2027 #\x2030 #\x2031 #\x2032
+ #\x2033 #\x2034 #\x2035 #\x2036 #\x2037 #\x2038 #\x203B
+ #\x203C #\x203D #\x203E #\x2041 #\x2042 #\x2043 #\x2047
+ #\x2048 #\x2049 #\x204A #\x204B #\x204C #\x204D #\x204E
+ #\x204F #\x2050 #\x2051 #\x2053 #\x2055 #\x2056 #\x2057
+ #\x2058 #\x2059 #\x205A #\x205B #\x205C #\x205D #\x205E
+ #\x2CF9 #\x2CFA #\x2CFB #\x2CFC #\x2CFE #\x2CFF #\x2D70
+ #\x2E00 #\x2E01 #\x2E06 #\x2E07 #\x2E08 #\x2E0B #\x2E0E
+ #\x2E0F #\x2E10 #\x2E11 #\x2E12 #\x2E13 #\x2E14 #\x2E15
+ #\x2E16 #\x2E18 #\x2E19 #\x2E1B #\x2E1E #\x2E1F #\x2E2A
+ #\x2E2B #\x2E2C #\x2E2D #\x2E2E #\x2E30 #\x2E31 #\x2E32
+ #\x2E33 #\x2E34 #\x2E35 #\x2E36 #\x2E37 #\x2E38 #\x2E39
+ #\x2E3C #\x2E3D #\x2E3E #\x2E3F #\x2E41 #\x2E43 #\x2E44
+ #\x2E45 #\x2E46 #\x2E47 #\x2E48 #\x2E49 #\x2E4A #\x2E4B
+ #\x2E4C #\x2E4D #\x2E4E #\x2E4F #\x2E52 #\x3001 #\x3002
+ #\x3003 #\x303D #\x30FB #\xA4FE #\xA4FF #\xA60D #\xA60E
+ #\xA60F #\xA673 #\xA67E #\xA6F2 #\xA6F3 #\xA6F4 #\xA6F5
+ #\xA6F6 #\xA6F7 #\xA874 #\xA875 #\xA876 #\xA877 #\xA8CE
+ #\xA8CF #\xA8F8 #\xA8F9 #\xA8FA #\xA8FC #\xA92E #\xA92F
+ #\xA95F #\xA9C1 #\xA9C2 #\xA9C3 #\xA9C4 #\xA9C5 #\xA9C6
+ #\xA9C7 #\xA9C8 #\xA9C9 #\xA9CA #\xA9CB #\xA9CC #\xA9CD
+ #\xA9DE #\x2D70 #\x2E00 #\x2E01 #\x2E06 #\x2E07 #\x2E08
+ #\x2E0B #\x2E0E #\x2E0F #\x2E10 #\x2E11 #\x2E12 #\x2E13
+ #\x2E14 #\x2E15 #\x2E16 #\x2E18 #\x2E19 #\x2E1B #\x2E1E
+ #\x2E1F #\x2E2A #\x2E2B #\x2E2C #\x2E2D #\x2E2E #\x2E30
+ #\x2E31 #\x2E32 #\x2E33 #\x2E34 #\x2E35 #\x2E36 #\x2E37
+ #\x2E38 #\x2E39 #\x2E3C #\x2E3D #\x2E3E #\x2E3F #\x2E41
+ #\x2E43 #\x2E44 #\x2E45 #\x2E46 #\x2E47 #\x2E48 #\x2E49
+ #\x2E4A #\x2E4B #\x2E4C #\x2E4D #\x2E4E #\x2E4F #\x2E52
+ #\x3001 #\x3002 #\x3003 #\x303D #\x30FB #\xA4FE #\xA4FF
+ #\xA60D #\xA60E #\xA60F #\xA673 #\xA67E #\xA6F2 #\xA6F3
+ #\xA6F4 #\xA6F5 #\xA6F6 #\xA6F7 #\xA874 #\xA875 #\xA876
+ #\xA877 #\xA8CE #\xA8CF #\xA8F8 #\xA8F9 #\xA8FA #\xA8FC
+ #\xA92E #\xA92F #\xA95F #\xA9C1 #\xA9C2 #\xA9C3 #\xA9C4
+ #\xA9C5 #\xA9C6 #\xA9C7 #\xA9C8 #\xA9C9 #\xA9CA #\xA9CB
+ #\xA9CC #\xA9CD #\xA9DE #\xA9DF #\xAA5C #\xAA5D #\xAA5E
+ #\xAA5F #\xAADE #\xAADF #\xAAF0 #\xAAF1 #\xABEB #\xFE10
+ #\xFE11 #\xFE12 #\xFE13 #\xFE14 #\xFE15 #\xFE16 #\xFE19
+ #\xFE30 #\xFE45 #\xFE46 #\xFE49 #\xFE4A #\xFE4B #\xFE4C
+ #\xFE50 #\xFE51 #\xFE52 #\xFE54 #\xFE55 #\xFE56 #\xFE57
+ #\xFE5F #\xFE60 #\xFE61 #\xFE68 #\xFE6A #\xFE6B #\xFF01
+ #\xFF02 #\xFF03 #\xFF05 #\xFF06 #\xFF07 #\xFF0A #\xFF0C
+ #\xFF0E #\xFF0F #\xFF1A #\xFF1B #\xFF1F #\xFF20 #\xFF3C
+ #\xFF61 #\xFF64 #\xFF65 #\x10100 #\x10101 #\x10102 #\x1039F
+ #\x103D0 #\x1056F #\x10857 #\x1091F #\x1093F #\x10A50
+ #\x10A51 #\x10A52 #\x10A53 #\x10A54 #\x10A55 #\x10A56
+ #\x10A57 #\x10A58 #\x10A7F #\x10AF0 #\x10AF1 #\x10AF2
+ #\x10AF3 #\x10AF4 #\x10AF5 #\x10AF6 #\x10B39 #\x10B3A
+ #\x10B3B #\x10B3C #\x10B3D #\x10B3E #\x10B3F #\x10B99
+ #\x10B9A #\x10B9B #\x10B9C #\x10F55 #\x10F56 #\x10F57
+ #\x10F58 #\x10F59 #\x11047 #\x11048 #\xA9DF #\xAA5C #\xAA5D
+ #\xAA5E #\xAA5F #\xAADE #\xAADF #\xAAF0 #\xAAF1 #\xABEB
+ #\xFE10 #\xFE11 #\xFE12 #\xFE13 #\xFE14 #\xFE15 #\xFE16
+ #\xFE19 #\xFE30 #\xFE45 #\xFE46 #\xFE49 #\xFE4A #\xFE4B
+ #\xFE4C #\xFE50 #\xFE51 #\xFE52 #\xFE54 #\xFE55 #\xFE56
+ #\xFE57 #\xFE5F #\xFE60 #\xFE61 #\xFE68 #\xFE6A #\xFE6B
+ #\xFF01 #\xFF02 #\xFF03 #\xFF05 #\xFF06 #\xFF07 #\xFF0A
+ #\xFF0C #\xFF0E #\xFF0F #\xFF1A #\xFF1B #\xFF1F #\xFF20
+ #\xFF3C #\xFF61 #\xFF64 #\xFF65 #\x10100 #\x10101 #\x10102
+ #\x1039F #\x103D0 #\x1056F #\x10857 #\x1091F #\x1093F
+ #\x10A50 #\x10A51 #\x10A52 #\x10A53 #\x10A54 #\x10A55
+ #\x10A56 #\x10A57 #\x10A58 #\x10A7F #\x10AF0 #\x10AF1
+ #\x10AF2 #\x10AF3 #\x10AF4 #\x10AF5 #\x10AF6 #\x10B39
+ #\x10B3A #\x10B3B #\x10B3C #\x10B3D #\x10B3E #\x10B3F
+ #\x10B99 #\x10B9A #\x10B9B #\x10B9C #\x10F55 #\x10F56
+ #\x10F57 #\x10F58 #\x10F59 #\x11047 #\x11048 #\x11049
+ #\x1104A #\x1104B #\x1104C #\x1104D #\x110BB #\x110BC
+ #\x110BE #\x110BF #\x110C0 #\x110C1 #\x11140 #\x11141
+ #\x11142 #\x11143 #\x11174 #\x11175 #\x111C5 #\x111C6
+ #\x111C7 #\x111C8 #\x111CD #\x111DB #\x111DD #\x111DE
+ #\x111DF #\x11238 #\x11239 #\x1123A #\x1123B #\x1123C
+ #\x1123D #\x112A9 #\x1144B #\x1144C #\x1144D #\x1144E
+ #\x1144F #\x1145A #\x1145B #\x1145D #\x114C6 #\x115C1
+ #\x115C2 #\x115C3 #\x115C4 #\x115C5 #\x115C6 #\x115C7
+ #\x115C8 #\x115C9 #\x115CA #\x115CB #\x115CC #\x115CD
+ #\x115CE #\x115CF #\x115D0 #\x115D1 #\x115D2 #\x115D3
+ #\x115D4 #\x115D5 #\x115D6 #\x115D7 #\x11641 #\x11642
+ #\x11643 #\x11660 #\x11661 #\x11662 #\x11663 #\x11664
+ #\x11665 #\x11666 #\x11667 #\x11668 #\x11669 #\x1166A
+ #\x1166B #\x1166C #\x1173C #\x1173D #\x1173E #\x1183B
+ #\x11944 #\x11945 #\x11946 #\x119E2 #\x11A3F #\x11A40
+ #\x11A41 #\x11A42 #\x11A43 #\x11A44 #\x11A45 #\x11A46
+ #\x11A9A #\x11A9B #\x11A9C #\x11049 #\x1104A #\x1104B
+ #\x1104C #\x1104D #\x110BB #\x110BC #\x110BE #\x110BF
+ #\x110C0 #\x110C1 #\x11140 #\x11141 #\x11142 #\x11143
+ #\x11174 #\x11175 #\x111C5 #\x111C6 #\x111C7 #\x111C8
+ #\x111CD #\x111DB #\x111DD #\x111DE #\x111DF #\x11238
+ #\x11239 #\x1123A #\x1123B #\x1123C #\x1123D #\x112A9
+ #\x1144B #\x1144C #\x1144D #\x1144E #\x1144F #\x1145A
+ #\x1145B #\x1145D #\x114C6 #\x115C1 #\x115C2 #\x115C3
+ #\x115C4 #\x115C5 #\x115C6 #\x115C7 #\x115C8 #\x115C9
+ #\x115CA #\x115CB #\x115CC #\x115CD #\x115CE #\x115CF
+ #\x115D0 #\x115D1 #\x115D2 #\x115D3 #\x115D4 #\x115D5
+ #\x115D6 #\x115D7 #\x11641 #\x11642 #\x11643 #\x11660
+ #\x11661 #\x11662 #\x11663 #\x11664 #\x11665 #\x11666
+ #\x11667 #\x11668 #\x11669 #\x1166A #\x1166B #\x1166C
+ #\x1173C #\x1173D #\x1173E #\x1183B #\x11944 #\x11945
+ #\x11946 #\x119E2 #\x11A3F #\x11A40 #\x11A41 #\x11A42
+ #\x11A43 #\x11A44 #\x11A45 #\x11A46 #\x11A9A #\x11A9B
+ #\x11A9C #\x11A9E #\x11A9F #\x11AA0 #\x11AA1 #\x11AA2
+ #\x11C41 #\x11C42 #\x11C43 #\x11C44 #\x11C45 #\x11C70
+ #\x11C71 #\x11EF7 #\x11EF8 #\x11FFF #\x12470 #\x12471
+ #\x12472 #\x12473 #\x12474 #\x16A6E #\x16A6F #\x16AF5
+ #\x16B37 #\x16B38 #\x16B39 #\x16B3A #\x16B3B #\x16B44
+ #\x16E97 #\x16E98 #\x16E99 #\x16E9A #\x16FE2 #\x1BC9F
+ #\x1DA87 #\x1DA88 #\x1DA89 #\x1DA8A #\x1DA8B #\x1E95E
+ #\x1E95F #\x11A9E #\x11A9F #\x11AA0 #\x11AA1 #\x11AA2
+ #\x11C41 #\x11C42 #\x11C43 #\x11C44 #\x11C45 #\x11C70
+ #\x11C71 #\x11EF7 #\x11EF8 #\x11FFF #\x12470 #\x12471
+ #\x12472 #\x12473 #\x12474 #\x16A6E #\x16A6F #\x16AF5
+ #\x16B37 #\x16B38 #\x16B39 #\x16B3A #\x16B3B #\x16B44
+ #\x16E97 #\x16E98 #\x16E99 #\x16E9A #\x16FE2 #\x1BC9F
+ #\x1DA87 #\x1DA88 #\x1DA89 #\x1DA8A #\x1DA8B #\x1E95E
+ #\x1E95F)
+ ;; Ps
+ (char-set #\x0028 #\x005B #\x007B #\x0F3A #\x0F3C #\x169B #\x201A
+ #\x201E #\x2045 #\x207D #\x208D #\x2308 #\x230A #\x2329
+ #\x2768 #\x276A #\x276C #\x276E #\x2770 #\x2772 #\x2774
+ #\x27C5 #\x27E6 #\x27E8 #\x27EA #\x27EC #\x27EE #\x2983
+ #\x2985 #\x2987 #\x2989 #\x298B #\x298D #\x298F #\x2991
+ #\x2993 #\x2995 #\x2997 #\x29D8 #\x29DA #\x29FC #\x2E22
+ #\x2E24 #\x2E26 #\x2E28 #\x2E42 #\x3008 #\x300A #\x300C
+ #\x300E #\x3010 #\x3014 #\x3016 #\x3018 #\x301A #\x301D
+ #\xFD3F #\xFE17 #\xFE35 #\xFE37 #\xFE39 #\xFE3B #\xFE3D
+ #\xFE3F #\xFE41 #\xFE43 #\xFE47 #\xFE59 #\xFE5B #\xFE5D
+ #\xFF08 #\xFF3B #\xFF5B #\xFF5F #\xFF62)))))
diff --git a/tests/parse.scm b/tests/parse.scm
index bd7e494..4e036b3 100644
--- a/tests/parse.scm
+++ b/tests/parse.scm
@@ -252,7 +252,7 @@
(test "testSingleWordTimerWithPunctuation"
'(recipe (metadata ())
- ((step ("Let it " (timer "rest" #f) ", after plating"))))
+ ((step ("Let it " (timer "rest" #f) ", then serve"))))
(cook->list (parse-cook "Let it ~rest, then serve\n")))
(test "testSingleWordTimerWithUnicodePunctuation"
@@ -283,7 +283,7 @@
(test "testSingleWordIngredientWithUnicodePunctuation"
'(recipe (metadata ())
- ((step ("Add some " (ingredient "chilli" #f) "⸫ then bake"))))
+ ((step ("Add " (ingredient "chilli" #f) "⸫ then bake"))))
(cook->list (parse-cook "Add @chilli⸫ then bake\n")))
; NOTE: the space is U+2009