From ee5c94e203aac4f5974f88f16616d525d92d737d Mon Sep 17 00:00:00 2001 From: Divide-By-0 Date: Tue, 12 Dec 2023 15:43:28 -0500 Subject: [PATCH] add comments for regex and substrings --- package.json | 6 +- .../circuits/common/email_addr_regex.circom | 2 + .../circuits/common/email_domain_regex.circom | 2 + .../circuits/common/message_id_regex.circom | 2 + .../circom/tests/circuits/simple_regex.circom | 2 + .../circuits/simple_regex_decomposed.circom | 194 ++++-- .../circuits/simple_regex_decomposed.json | 2 +- .../tests/circuits/test_simple_regex.circom | 1 + .../test_simple_regex_decomposed.circom | 2 +- packages/compiler/src/circom.rs | 4 +- packages/compiler/src/gen_circom.ts | 641 +++++++++--------- packages/compiler/src/js_caller.rs | 4 +- packages/compiler/src/lib.rs | 38 +- packages/compiler/src/node.rs | 32 +- 14 files changed, 523 insertions(+), 409 deletions(-) diff --git a/package.json b/package.json index 5f97868..08dd9eb 100644 --- a/package.json +++ b/package.json @@ -8,10 +8,10 @@ "packages/*" ], "contributors": [ - "Javier Su ", - "Kata Choi ", "Sora Suegami ", - "Yush G " + "Yush G ", + "Javier Su ", + "Kata Choi " ], "scripts": { "install": "yarn workspaces -pt run install", diff --git a/packages/circom/circuits/common/email_addr_regex.circom b/packages/circom/circuits/common/email_addr_regex.circom index 9a029d5..633015a 100644 --- a/packages/circom/circuits/common/email_addr_regex.circom +++ b/packages/circom/circuits/common/email_addr_regex.circom @@ -2,6 +2,7 @@ pragma circom 2.1.5; include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom"; +// regex: (a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|!|#|$|%|&|'|\*|\+|-|/|=|\?|^|_|`|{|\||}|~|\.)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\.|-)+ template EmailAddrRegex(msg_bytes) { signal input msg[msg_bytes]; signal output out; @@ -248,6 +249,7 @@ template EmailAddrRegex(msg_bytes) { is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][3] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1]; is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0]; } + // substrings calculated: [{(1, 2), (1, 1), (0, 1), (3, 3), (2, 3)}] signal is_substr0[msg_bytes][6]; signal is_reveal0[msg_bytes]; signal output reveal0[msg_bytes]; diff --git a/packages/circom/circuits/common/email_domain_regex.circom b/packages/circom/circuits/common/email_domain_regex.circom index 20f3cc5..38f05c6 100644 --- a/packages/circom/circuits/common/email_domain_regex.circom +++ b/packages/circom/circuits/common/email_domain_regex.circom @@ -2,6 +2,7 @@ pragma circom 2.1.5; include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom"; +// regex: (a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|!|#|$|%|&|'|\*|\+|-|/|=|\?|^|_|`|{|\||}|~|\.)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\.|-)+ template EmailDomainRegex(msg_bytes) { signal input msg[msg_bytes]; signal output out; @@ -248,6 +249,7 @@ template EmailDomainRegex(msg_bytes) { is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][3] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1]; is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0]; } + // substrings calculated: [{(3, 3), (2, 3)}] signal is_substr0[msg_bytes][3]; signal is_reveal0[msg_bytes]; signal output reveal0[msg_bytes]; diff --git a/packages/circom/circuits/common/message_id_regex.circom b/packages/circom/circuits/common/message_id_regex.circom index f89299e..e521d52 100644 --- a/packages/circom/circuits/common/message_id_regex.circom +++ b/packages/circom/circuits/common/message_id_regex.circom @@ -2,6 +2,7 @@ pragma circom 2.1.5; include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom"; +// regex: (( \n)|^)message-id:<(=|@|\.|\+|_|-|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9)+> \n template MessageIdRegex(msg_bytes) { signal input msg[msg_bytes]; signal output out; @@ -270,6 +271,7 @@ template MessageIdRegex(msg_bytes) { is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][7] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1]; is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0]; } + // substrings calculated: [{(17, 18), (1, 4), (1, 1), (18, 1)}] signal is_substr0[msg_bytes][5]; signal is_reveal0[msg_bytes]; signal output reveal0[msg_bytes]; diff --git a/packages/circom/tests/circuits/simple_regex.circom b/packages/circom/tests/circuits/simple_regex.circom index f24bced..3f499c1 100644 --- a/packages/circom/tests/circuits/simple_regex.circom +++ b/packages/circom/tests/circuits/simple_regex.circom @@ -2,6 +2,7 @@ pragma circom 2.1.5; include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom"; +// regex: 1=(a|b) (2=(b|c)+ )+d template SimpleRegex(msg_bytes) { signal input msg[msg_bytes]; signal output out; @@ -128,6 +129,7 @@ template SimpleRegex(msg_bytes) { is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][9] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1]; is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0]; } + // substrings calculated: [{(2, 3)}, {(6, 7), (7, 7)}, {(8, 9)}] signal is_substr0[msg_bytes][2]; signal is_reveal0[msg_bytes]; signal output reveal0[msg_bytes]; diff --git a/packages/circom/tests/circuits/simple_regex_decomposed.circom b/packages/circom/tests/circuits/simple_regex_decomposed.circom index 5789570..7c0ac38 100644 --- a/packages/circom/tests/circuits/simple_regex_decomposed.circom +++ b/packages/circom/tests/circuits/simple_regex_decomposed.circom @@ -2,6 +2,7 @@ pragma circom 2.1.5; include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom"; +// regex: email was meant for @[a-zA-Z0-9_]+. template SimpleRegexDecomposed(msg_bytes) { signal input msg[msg_bytes]; signal output out; @@ -13,7 +14,7 @@ template SimpleRegexDecomposed(msg_bytes) { in[i+1] <== msg[i]; } - component eq[15][num_bytes]; + component eq[26][num_bytes]; component lt[4][num_bytes]; component and[26][num_bytes]; component multi_or[2][num_bytes]; @@ -45,11 +46,55 @@ template SimpleRegexDecomposed(msg_bytes) { and[1][i] = AND(); and[1][i].a <== lt[2][i].out; and[1][i].b <== lt[3][i].out; + eq[0][i] = IsEqual(); + eq[0][i].in[0] <== in[i]; + eq[0][i].in[1] <== 48; + eq[1][i] = IsEqual(); + eq[1][i].in[0] <== in[i]; + eq[1][i].in[1] <== 49; + eq[2][i] = IsEqual(); + eq[2][i].in[0] <== in[i]; + eq[2][i].in[1] <== 50; + eq[3][i] = IsEqual(); + eq[3][i].in[0] <== in[i]; + eq[3][i].in[1] <== 51; + eq[4][i] = IsEqual(); + eq[4][i].in[0] <== in[i]; + eq[4][i].in[1] <== 52; + eq[5][i] = IsEqual(); + eq[5][i].in[0] <== in[i]; + eq[5][i].in[1] <== 53; + eq[6][i] = IsEqual(); + eq[6][i].in[0] <== in[i]; + eq[6][i].in[1] <== 54; + eq[7][i] = IsEqual(); + eq[7][i].in[0] <== in[i]; + eq[7][i].in[1] <== 55; + eq[8][i] = IsEqual(); + eq[8][i].in[0] <== in[i]; + eq[8][i].in[1] <== 56; + eq[9][i] = IsEqual(); + eq[9][i].in[0] <== in[i]; + eq[9][i].in[1] <== 57; + eq[10][i] = IsEqual(); + eq[10][i].in[0] <== in[i]; + eq[10][i].in[1] <== 95; and[2][i] = AND(); and[2][i].a <== states[i][1]; - multi_or[0][i] = MultiOR(2); + multi_or[0][i] = MultiOR(13); multi_or[0][i].in[0] <== and[0][i].out; multi_or[0][i].in[1] <== and[1][i].out; + multi_or[0][i].in[2] <== eq[0][i].out; + multi_or[0][i].in[3] <== eq[1][i].out; + multi_or[0][i].in[4] <== eq[2][i].out; + multi_or[0][i].in[5] <== eq[3][i].out; + multi_or[0][i].in[6] <== eq[4][i].out; + multi_or[0][i].in[7] <== eq[5][i].out; + multi_or[0][i].in[8] <== eq[6][i].out; + multi_or[0][i].in[9] <== eq[7][i].out; + multi_or[0][i].in[10] <== eq[8][i].out; + multi_or[0][i].in[11] <== eq[9][i].out; + multi_or[0][i].in[12] <== eq[10][i].out; and[2][i].b <== multi_or[0][i].out; and[3][i] = AND(); and[3][i].a <== states[i][23]; @@ -59,159 +104,159 @@ template SimpleRegexDecomposed(msg_bytes) { multi_or[1][i].in[1] <== and[3][i].out; states[i+1][1] <== multi_or[1][i].out; state_changed[i].in[0] <== states[i+1][1]; - eq[0][i] = IsEqual(); - eq[0][i].in[0] <== in[i]; - eq[0][i].in[1] <== 101; + eq[11][i] = IsEqual(); + eq[11][i].in[0] <== in[i]; + eq[11][i].in[1] <== 101; and[4][i] = AND(); and[4][i].a <== states[i][0]; - and[4][i].b <== eq[0][i].out; + and[4][i].b <== eq[11][i].out; states[i+1][2] <== and[4][i].out; state_changed[i].in[1] <== states[i+1][2]; - eq[1][i] = IsEqual(); - eq[1][i].in[0] <== in[i]; - eq[1][i].in[1] <== 46; + eq[12][i] = IsEqual(); + eq[12][i].in[0] <== in[i]; + eq[12][i].in[1] <== 109; and[5][i] = AND(); - and[5][i].a <== states[i][1]; - and[5][i].b <== eq[1][i].out; + and[5][i].a <== states[i][2]; + and[5][i].b <== eq[12][i].out; states[i+1][3] <== and[5][i].out; state_changed[i].in[2] <== states[i+1][3]; - eq[2][i] = IsEqual(); - eq[2][i].in[0] <== in[i]; - eq[2][i].in[1] <== 109; + eq[13][i] = IsEqual(); + eq[13][i].in[0] <== in[i]; + eq[13][i].in[1] <== 46; and[6][i] = AND(); - and[6][i].a <== states[i][2]; - and[6][i].b <== eq[2][i].out; + and[6][i].a <== states[i][1]; + and[6][i].b <== eq[13][i].out; states[i+1][4] <== and[6][i].out; state_changed[i].in[3] <== states[i+1][4]; - eq[3][i] = IsEqual(); - eq[3][i].in[0] <== in[i]; - eq[3][i].in[1] <== 97; + eq[14][i] = IsEqual(); + eq[14][i].in[0] <== in[i]; + eq[14][i].in[1] <== 97; and[7][i] = AND(); - and[7][i].a <== states[i][4]; - and[7][i].b <== eq[3][i].out; + and[7][i].a <== states[i][3]; + and[7][i].b <== eq[14][i].out; states[i+1][5] <== and[7][i].out; state_changed[i].in[4] <== states[i+1][5]; - eq[4][i] = IsEqual(); - eq[4][i].in[0] <== in[i]; - eq[4][i].in[1] <== 105; + eq[15][i] = IsEqual(); + eq[15][i].in[0] <== in[i]; + eq[15][i].in[1] <== 105; and[8][i] = AND(); and[8][i].a <== states[i][5]; - and[8][i].b <== eq[4][i].out; + and[8][i].b <== eq[15][i].out; states[i+1][6] <== and[8][i].out; state_changed[i].in[5] <== states[i+1][6]; - eq[5][i] = IsEqual(); - eq[5][i].in[0] <== in[i]; - eq[5][i].in[1] <== 108; + eq[16][i] = IsEqual(); + eq[16][i].in[0] <== in[i]; + eq[16][i].in[1] <== 108; and[9][i] = AND(); and[9][i].a <== states[i][6]; - and[9][i].b <== eq[5][i].out; + and[9][i].b <== eq[16][i].out; states[i+1][7] <== and[9][i].out; state_changed[i].in[6] <== states[i+1][7]; - eq[6][i] = IsEqual(); - eq[6][i].in[0] <== in[i]; - eq[6][i].in[1] <== 32; + eq[17][i] = IsEqual(); + eq[17][i].in[0] <== in[i]; + eq[17][i].in[1] <== 32; and[10][i] = AND(); and[10][i].a <== states[i][7]; - and[10][i].b <== eq[6][i].out; + and[10][i].b <== eq[17][i].out; states[i+1][8] <== and[10][i].out; state_changed[i].in[7] <== states[i+1][8]; - eq[7][i] = IsEqual(); - eq[7][i].in[0] <== in[i]; - eq[7][i].in[1] <== 119; + eq[18][i] = IsEqual(); + eq[18][i].in[0] <== in[i]; + eq[18][i].in[1] <== 119; and[11][i] = AND(); and[11][i].a <== states[i][8]; - and[11][i].b <== eq[7][i].out; + and[11][i].b <== eq[18][i].out; states[i+1][9] <== and[11][i].out; state_changed[i].in[8] <== states[i+1][9]; and[12][i] = AND(); and[12][i].a <== states[i][9]; - and[12][i].b <== eq[3][i].out; + and[12][i].b <== eq[14][i].out; states[i+1][10] <== and[12][i].out; state_changed[i].in[9] <== states[i+1][10]; - eq[8][i] = IsEqual(); - eq[8][i].in[0] <== in[i]; - eq[8][i].in[1] <== 115; + eq[19][i] = IsEqual(); + eq[19][i].in[0] <== in[i]; + eq[19][i].in[1] <== 115; and[13][i] = AND(); and[13][i].a <== states[i][10]; - and[13][i].b <== eq[8][i].out; + and[13][i].b <== eq[19][i].out; states[i+1][11] <== and[13][i].out; state_changed[i].in[10] <== states[i+1][11]; and[14][i] = AND(); and[14][i].a <== states[i][11]; - and[14][i].b <== eq[6][i].out; + and[14][i].b <== eq[17][i].out; states[i+1][12] <== and[14][i].out; state_changed[i].in[11] <== states[i+1][12]; and[15][i] = AND(); and[15][i].a <== states[i][12]; - and[15][i].b <== eq[2][i].out; + and[15][i].b <== eq[12][i].out; states[i+1][13] <== and[15][i].out; state_changed[i].in[12] <== states[i+1][13]; and[16][i] = AND(); and[16][i].a <== states[i][13]; - and[16][i].b <== eq[0][i].out; + and[16][i].b <== eq[11][i].out; states[i+1][14] <== and[16][i].out; state_changed[i].in[13] <== states[i+1][14]; and[17][i] = AND(); and[17][i].a <== states[i][14]; - and[17][i].b <== eq[3][i].out; + and[17][i].b <== eq[14][i].out; states[i+1][15] <== and[17][i].out; state_changed[i].in[14] <== states[i+1][15]; - eq[9][i] = IsEqual(); - eq[9][i].in[0] <== in[i]; - eq[9][i].in[1] <== 110; + eq[20][i] = IsEqual(); + eq[20][i].in[0] <== in[i]; + eq[20][i].in[1] <== 110; and[18][i] = AND(); and[18][i].a <== states[i][15]; - and[18][i].b <== eq[9][i].out; + and[18][i].b <== eq[20][i].out; states[i+1][16] <== and[18][i].out; state_changed[i].in[15] <== states[i+1][16]; - eq[10][i] = IsEqual(); - eq[10][i].in[0] <== in[i]; - eq[10][i].in[1] <== 116; + eq[21][i] = IsEqual(); + eq[21][i].in[0] <== in[i]; + eq[21][i].in[1] <== 116; and[19][i] = AND(); and[19][i].a <== states[i][16]; - and[19][i].b <== eq[10][i].out; + and[19][i].b <== eq[21][i].out; states[i+1][17] <== and[19][i].out; state_changed[i].in[16] <== states[i+1][17]; and[20][i] = AND(); and[20][i].a <== states[i][17]; - and[20][i].b <== eq[6][i].out; + and[20][i].b <== eq[17][i].out; states[i+1][18] <== and[20][i].out; state_changed[i].in[17] <== states[i+1][18]; - eq[11][i] = IsEqual(); - eq[11][i].in[0] <== in[i]; - eq[11][i].in[1] <== 102; + eq[22][i] = IsEqual(); + eq[22][i].in[0] <== in[i]; + eq[22][i].in[1] <== 102; and[21][i] = AND(); and[21][i].a <== states[i][18]; - and[21][i].b <== eq[11][i].out; + and[21][i].b <== eq[22][i].out; states[i+1][19] <== and[21][i].out; state_changed[i].in[18] <== states[i+1][19]; - eq[12][i] = IsEqual(); - eq[12][i].in[0] <== in[i]; - eq[12][i].in[1] <== 111; + eq[23][i] = IsEqual(); + eq[23][i].in[0] <== in[i]; + eq[23][i].in[1] <== 111; and[22][i] = AND(); and[22][i].a <== states[i][19]; - and[22][i].b <== eq[12][i].out; + and[22][i].b <== eq[23][i].out; states[i+1][20] <== and[22][i].out; state_changed[i].in[19] <== states[i+1][20]; - eq[13][i] = IsEqual(); - eq[13][i].in[0] <== in[i]; - eq[13][i].in[1] <== 114; + eq[24][i] = IsEqual(); + eq[24][i].in[0] <== in[i]; + eq[24][i].in[1] <== 114; and[23][i] = AND(); and[23][i].a <== states[i][20]; - and[23][i].b <== eq[13][i].out; + and[23][i].b <== eq[24][i].out; states[i+1][21] <== and[23][i].out; state_changed[i].in[20] <== states[i+1][21]; and[24][i] = AND(); and[24][i].a <== states[i][21]; - and[24][i].b <== eq[6][i].out; + and[24][i].b <== eq[17][i].out; states[i+1][22] <== and[24][i].out; state_changed[i].in[21] <== states[i+1][22]; - eq[14][i] = IsEqual(); - eq[14][i].in[0] <== in[i]; - eq[14][i].in[1] <== 64; + eq[25][i] = IsEqual(); + eq[25][i].in[0] <== in[i]; + eq[25][i].in[1] <== 64; and[25][i] = AND(); and[25][i].a <== states[i][22]; - and[25][i].b <== eq[14][i].out; + and[25][i].b <== eq[25][i].out; states[i+1][23] <== and[25][i].out; state_changed[i].in[22] <== states[i+1][23]; states[i+1][0] <== 1 - state_changed[i].out; @@ -219,16 +264,17 @@ template SimpleRegexDecomposed(msg_bytes) { component final_state_result = MultiOR(num_bytes+1); for (var i = 0; i <= num_bytes; i++) { - final_state_result.in[i] <== states[i][3]; + final_state_result.in[i] <== states[i][4]; } out <== final_state_result.out; signal is_consecutive[msg_bytes+1][2]; is_consecutive[msg_bytes][1] <== 1; for (var i = 0; i < msg_bytes; i++) { - is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][3] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1]; + is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][4] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1]; is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0]; } + // substrings calculated: [{(1, 1), (23, 1)}] signal is_substr0[msg_bytes][3]; signal is_reveal0[msg_bytes]; signal output reveal0[msg_bytes]; diff --git a/packages/circom/tests/circuits/simple_regex_decomposed.json b/packages/circom/tests/circuits/simple_regex_decomposed.json index f149462..637cb71 100644 --- a/packages/circom/tests/circuits/simple_regex_decomposed.json +++ b/packages/circom/tests/circuits/simple_regex_decomposed.json @@ -6,7 +6,7 @@ }, { "is_public": true, - "regex_def": "[a-zA-Z]+" + "regex_def": "[a-zA-Z0-9_]+" }, { "is_public": false, diff --git a/packages/circom/tests/circuits/test_simple_regex.circom b/packages/circom/tests/circuits/test_simple_regex.circom index 2d3cda4..e5563e1 100644 --- a/packages/circom/tests/circuits/test_simple_regex.circom +++ b/packages/circom/tests/circuits/test_simple_regex.circom @@ -1,3 +1,4 @@ +pragma circom 2.1.5; include "./simple_regex.circom"; // 1=(a|b) (2=(b|c)+ )+d component main = SimpleRegex(64); \ No newline at end of file diff --git a/packages/circom/tests/circuits/test_simple_regex_decomposed.circom b/packages/circom/tests/circuits/test_simple_regex_decomposed.circom index 0b85f6c..500c362 100644 --- a/packages/circom/tests/circuits/test_simple_regex_decomposed.circom +++ b/packages/circom/tests/circuits/test_simple_regex_decomposed.circom @@ -1,3 +1,3 @@ include "./simple_regex_decomposed.circom"; -// email was meant for @[a-zA-Z]+\. +// email was meant for @[a-zA-Z0-9_]+\. component main = SimpleRegexDecomposed(64); \ No newline at end of file diff --git a/packages/compiler/src/circom.rs b/packages/compiler/src/circom.rs index 1dbd5ff..7d94ce1 100644 --- a/packages/compiler/src/circom.rs +++ b/packages/compiler/src/circom.rs @@ -21,8 +21,7 @@ impl RegexAndDFA { template_name: &str, gen_substrs: bool, ) -> Result<(), CompilerError> { - // let all_regex = String::new(); - let circom = gen_circom_allstr(&self.dfa_val, template_name)?; + let circom = gen_circom_allstr(&self.dfa_val, template_name, &self.regex_str)?; if gen_substrs { self.add_substrs_constraints(circom_path, circom)?; } else { @@ -49,6 +48,7 @@ impl RegexAndDFA { circom += "\t}\n"; let substr_defs_array = &self.substrs_defs.substr_defs_array; + circom += &format!("\t// substrings calculated: {:?}\n", &self.substrs_defs.substr_defs_array); for (idx, defs) in substr_defs_array.into_iter().enumerate() { let num_defs = defs.len(); circom += &format!("\tsignal is_substr{}[msg_bytes][{}];\n", idx, num_defs + 1); diff --git a/packages/compiler/src/gen_circom.ts b/packages/compiler/src/gen_circom.ts index b06854c..164a693 100644 --- a/packages/compiler/src/gen_circom.ts +++ b/packages/compiler/src/gen_circom.ts @@ -3,323 +3,354 @@ type Graph = { edges: Record; }[]; -function genCircomAllstr(graph_json: Graph, template_name: string): string { - const N: number = graph_json.length; - // console.log(JSON.stringify(graph_json, null, 2)); - // const graph = Array(N).fill({}); - const rev_graph: Record> = {}; - const to_init_graph: number[][] = []; - let init_going_state: number | null = null; - - for (let i = 0; i < N; i++) { - rev_graph[i] = {}; - to_init_graph.push([]); - } - - const accept_nodes: Set = new Set(); - for (let i = 0; i < N; i++) { - const node = graph_json[i]; - for (let k in node.edges) { - const v: number = node.edges[k]; - rev_graph[v][i] = Array.from(JSON.parse(k)).map(c => (c as string).charCodeAt(0)); - if (i === 0) { - const index = rev_graph[v][i].indexOf(94); - if (index !== -1) { - init_going_state = v; - rev_graph[v][i][index] = 255; - } - for (let j = 0; j < rev_graph[v][i].length; j++) { - if (rev_graph[v][i][j] == 255) { - continue; - } - to_init_graph[v].push(rev_graph[v][i][j]); - } - } +function genCircomAllstr( + graph_json: Graph, + template_name: string, + regex_str = "" +): string { + /** + * This function generates a Circom circuit from a given graph_json, template_name, and regex_str. + * @param {Object} graph_json - The graph in JSON format. + * @param {string} template_name - The name to be used for the Circom template. + * @param {string} regex_str - The regular expression string, used only to print in a comment at the top. + */ + + const N = graph_json.length; + // console.log(JSON.stringify(graph_json, null, 2)); + // const graph = Array(N).fill({}); + const rev_graph: Record> = {}; + const to_init_graph: number[][] = []; + let init_going_state: number | null = null; + + for (let i = 0; i < N; i++) { + rev_graph[i] = {}; + to_init_graph.push([]); + } + + const accept_nodes: Set = new Set(); + for (let i = 0; i < N; i++) { + const node = graph_json[i]; + for (let k in node.edges) { + const v: number = node.edges[k]; + rev_graph[v][i] = Array.from(JSON.parse(k)).map((c) => + (c as string).charCodeAt(0) + ); + if (i === 0) { + const index = rev_graph[v][i].indexOf(94); + if (index !== -1) { + init_going_state = v; + rev_graph[v][i][index] = 255; } - if (node.type == "accept") { - accept_nodes.add(i); - } - } - - if (init_going_state !== null) { - for (const [going_state, chars] of Object.entries(to_init_graph)) { - const going_state_num = Number(going_state); - if (chars.length === 0) { - continue; - } - if (rev_graph[going_state_num][init_going_state] == null) { - rev_graph[going_state_num][init_going_state] = []; - } - rev_graph[going_state_num][init_going_state] = rev_graph[going_state_num][init_going_state].concat(chars); - + for (let j = 0; j < rev_graph[v][i].length; j++) { + if (rev_graph[v][i][j] == 255) { + continue; + } + to_init_graph[v].push(rev_graph[v][i][j]); } + } } - - if (accept_nodes.size === 0) { - throw new Error("accept node must exist"); + if (node.type == "accept") { + accept_nodes.add(i); } - const accept_nodes_array = [...accept_nodes]; - if (accept_nodes_array.length !== 1) { - throw new Error("the size of accept nodes must be one"); + } + + if (init_going_state !== null) { + for (const [going_state, chars] of Object.entries(to_init_graph)) { + const going_state_num = Number(going_state); + if (chars.length === 0) { + continue; + } + if (rev_graph[going_state_num][init_going_state] == null) { + rev_graph[going_state_num][init_going_state] = []; + } + rev_graph[going_state_num][init_going_state] = + rev_graph[going_state_num][init_going_state].concat(chars); } - - let eq_i: number = 0; - let lt_i: number = 0; - let and_i: number = 0; - let multi_or_i: number = 0; - - const range_checks: number[][][] = new Array(256); - for (let i = 0; i < 256; i++) { - range_checks[i] = new Array(256); - } - const eq_checks: number[] = new Array(256); - const multi_or_checks1: Record = {}; - const multi_or_checks2: Record = {}; - - let lines: string[] = []; - lines.push(`\tfor (var i = 0; i < num_bytes; i++) {`); - - // const uppercase = new Set(Array.from("ABCDEFGHIJKLMNOPQRSTUVWXYZ").map(c => c.charCodeAt())); - // const lowercase = new Set(Array.from("abcdefghijklmnopqrstuvwxyz").map(c => c.charCodeAt())); - // const digits = new Set(Array.from("0123456789").map(c => c.charCodeAt())); - // const symbols1 = new Set([":", ";", "<", "=", ">", "?", "@"].map(c => c.charCodeAt())); - // const symbols2 = new Set(["[", "\\", "]", "^", "_", "`"].map(c => c.charCodeAt())); - // const symbols3 = new Set(["{", "|", "}", "~"].map(c => c.charCodeAt())); - lines.push(`\t\tstate_changed[i] = MultiOR(${N - 1});`); - - for (let i = 1; i < N; i++) { - const outputs: number[] = []; - // let is_negates = []; - for (const prev_i of Object.keys(rev_graph[i])) { - const prev_i_num = Number(prev_i); - const k = rev_graph[i][prev_i_num]; - k.sort((a, b) => Number(a) - Number(b)); - const eq_outputs: [string, number][] = []; - let vals: Set = new Set(k); - // let is_negate = false; - // if (vals.has(0xff)) { - // vals.delete(0xff); - // is_negate = true; - // } - if (vals.size === 0) { - continue; - } - // if (is_negate === true) { - // for (let another_i = 1; another_i < N; another_i++) { - // if (i === another_i) { - // continue; - // } - // if (rev_graph[another_i][prev_i] === null) { - // continue; - // } - // const another_vals = new Set(rev_graph[another_i][prev_i]); - // if (another_vals.size === 0) { - // continue; - // } - // for (let another_val of another_vals) { - // vals.add(another_val); - // } - // } - // } - const min_maxes: [number, number][] = []; - let cur_min: number = k[0]; - let cur_max: number = k[0]; - - for (let idx = 1; idx < k.length; ++idx) { - if (cur_max === k[idx]) { - continue; - } - else if (cur_max + 1 === k[idx]) { - cur_max += 1; - } else { - if (cur_max - cur_min >= 16) { - min_maxes.push([cur_min, cur_max]); - } - cur_min = k[idx]; - cur_max = k[idx]; - } - } - - if (cur_max - cur_min >= 16) { - min_maxes.push([cur_min, cur_max]); - } - for (const min_max of min_maxes) { - for (let code = min_max[0]; code <= min_max[1]; ++code) { - vals.delete(code); - } - } - - // for (let subsets of [ - // [digits, 47, 58], - // [symbols1, 57, 65], - // [uppercase, 64, 91], - // [symbols2, 90, 97], - // [lowercase, 96, 123], - // [symbols3, 122, 127] - // ]) { - // const subset = subsets[0]; - // const min = subsets[1]; - // const max = subsets[2]; - // if (vals.isSuperset(subset)) { - // vals.difference(subset); - // if (min_maxs.length == 0) { - // min_maxs.push([min, max]); - // } else { - // const last = min_maxs[min_maxs.length - 1]; - // if (last[1] - 1 == min) { - // min_maxs[min_maxs.length - 1][1] = max; - // } else { - // min_maxs.push([min, max]); - // } - // } - // } - // } - - for (let min_max of min_maxes) { - const min: number = min_max[0]; - const max: number = min_max[1]; - if (range_checks[min][max] === undefined) { - lines.push(`\t\tlt[${lt_i}][i] = LessEqThan(8);`); - lines.push(`\t\tlt[${lt_i}][i].in[0] <== ${min};`); - lines.push(`\t\tlt[${lt_i}][i].in[1] <== in[i];`); - - lines.push(`\t\tlt[${lt_i + 1}][i] = LessEqThan(8);`); - lines.push(`\t\tlt[${lt_i + 1}][i].in[0] <== in[i];`); - lines.push(`\t\tlt[${lt_i + 1}][i].in[1] <== ${max};`); - - lines.push(`\t\tand[${and_i}][i] = AND();`); - lines.push(`\t\tand[${and_i}][i].a <== lt[${lt_i}][i].out;`); - lines.push(`\t\tand[${and_i}][i].b <== lt[${lt_i + 1}][i].out;`); - - eq_outputs.push(['and', and_i]); - range_checks[min][max] = [lt_i, and_i]; - lt_i += 2; - and_i += 1; - } else { - let [_, and_i] = range_checks[min][max]; - eq_outputs.push(['and', and_i]); - } - - } - for (let code of vals) { - if (eq_checks[code] === undefined) { - lines.push(`\t\teq[${eq_i}][i] = IsEqual();`); - lines.push(`\t\teq[${eq_i}][i].in[0] <== in[i];`); - lines.push(`\t\teq[${eq_i}][i].in[1] <== ${code};`); - eq_outputs.push(['eq', eq_i]); - eq_checks[code] = eq_i; - eq_i += 1; - } else { - eq_outputs.push(['eq', eq_checks[code]]); - } - } - - lines.push(`\t\tand[${and_i}][i] = AND();`); - lines.push(`\t\tand[${and_i}][i].a <== states[i][${prev_i}];`); - if (eq_outputs.length === 1) { - // if (is_negate) { - // lines.push(`\t\tand[${and_i}][i].b <== 1 - ${eq_outputs[0][0]}[${eq_outputs[0][1]}][i].out;`); - // } else { - // lines.push(`\t\tand[${and_i}][i].b <== ${eq_outputs[0][0]}[${eq_outputs[0][1]}][i].out;`); - // } - lines.push(`\t\tand[${and_i}][i].b <== ${eq_outputs[0][0]}[${eq_outputs[0][1]}][i].out;`); - } else if (eq_outputs.length > 1) { - const eq_outputs_key: string = JSON.stringify(eq_outputs); - if (multi_or_checks1[eq_outputs_key] === undefined) { - lines.push(`\t\tmulti_or[${multi_or_i}][i] = MultiOR(${eq_outputs.length});`); - for (let output_i = 0; output_i < eq_outputs.length; output_i++) { - lines.push(`\t\tmulti_or[${multi_or_i}][i].in[${output_i}] <== ${eq_outputs[output_i][0]}[${eq_outputs[output_i][1]}][i].out;`); - } - // if (is_negate) { - // lines.push(`\t\tand[${and_i}][i].b <== 1 - multi_or[${multi_or_i}][i].out;`); - // } else { - // lines.push(`\t\tand[${and_i}][i].b <== multi_or[${multi_or_i}][i].out;`); - // } - lines.push(`\t\tand[${and_i}][i].b <== multi_or[${multi_or_i}][i].out;`); - multi_or_checks1[eq_outputs_key] = multi_or_i; - multi_or_i += 1 - } else { - lines.push(`\t\tand[${and_i}][i].b <== multi_or[${multi_or_checks1[eq_outputs_key]}][i].out;`); - } - } - - outputs.push(and_i); - and_i += 1; + } + + if (accept_nodes.size === 0) { + throw new Error("accept node must exist"); + } + const accept_nodes_array = [...accept_nodes]; + if (accept_nodes_array.length !== 1) { + throw new Error("the size of accept nodes must be one"); + } + + let eq_i: number = 0; + let lt_i: number = 0; + let and_i: number = 0; + let multi_or_i: number = 0; + + const range_checks: number[][][] = new Array(256); + for (let i = 0; i < 256; i++) { + range_checks[i] = new Array(256); + } + const eq_checks: number[] = new Array(256); + const multi_or_checks1: Record = {}; + const multi_or_checks2: Record = {}; + + let lines: string[] = []; + lines.push(`\tfor (var i = 0; i < num_bytes; i++) {`); + + // const uppercase = new Set(Array.from("ABCDEFGHIJKLMNOPQRSTUVWXYZ").map(c => c.charCodeAt())); + // const lowercase = new Set(Array.from("abcdefghijklmnopqrstuvwxyz").map(c => c.charCodeAt())); + // const digits = new Set(Array.from("0123456789").map(c => c.charCodeAt())); + // const symbols1 = new Set([":", ";", "<", "=", ">", "?", "@"].map(c => c.charCodeAt())); + // const symbols2 = new Set(["[", "\\", "]", "^", "_", "`"].map(c => c.charCodeAt())); + // const symbols3 = new Set(["{", "|", "}", "~"].map(c => c.charCodeAt())); + lines.push(`\t\tstate_changed[i] = MultiOR(${N - 1});`); + + for (let i = 1; i < N; i++) { + const outputs: number[] = []; + // let is_negates = []; + for (const prev_i of Object.keys(rev_graph[i])) { + const prev_i_num = Number(prev_i); + const k = rev_graph[i][prev_i_num]; + k.sort((a, b) => Number(a) - Number(b)); + const eq_outputs: [string, number][] = []; + let vals: Set = new Set(k); + // let is_negate = false; + // if (vals.has(0xff)) { + // vals.delete(0xff); + // is_negate = true; + // } + if (vals.size === 0) { + continue; + } + // if (is_negate === true) { + // for (let another_i = 1; another_i < N; another_i++) { + // if (i === another_i) { + // continue; + // } + // if (rev_graph[another_i][prev_i] === null) { + // continue; + // } + // const another_vals = new Set(rev_graph[another_i][prev_i]); + // if (another_vals.size === 0) { + // continue; + // } + // for (let another_val of another_vals) { + // vals.add(another_val); + // } + // } + // } + const min_maxes: [number, number][] = []; + let cur_min: number = k[0]; + let cur_max: number = k[0]; + + for (let idx = 1; idx < k.length; ++idx) { + if (cur_max === k[idx]) { + continue; + } else if (cur_max + 1 === k[idx]) { + cur_max += 1; + } else { + if (cur_max - cur_min >= 16) { + min_maxes.push([cur_min, cur_max]); + } + cur_min = k[idx]; + cur_max = k[idx]; } - - if (outputs.length === 1) { - lines.push(`\t\tstates[i+1][${i}] <== and[${outputs[0]}][i].out;`); - } else if (outputs.length > 1) { - const outputs_key: string = JSON.stringify(outputs); - if (multi_or_checks2[outputs_key] === undefined) { - lines.push(`\t\tmulti_or[${multi_or_i}][i] = MultiOR(${outputs.length});`); - for (let output_i = 0; output_i < outputs.length; output_i++) { - lines.push(`\t\tmulti_or[${multi_or_i}][i].in[${output_i}] <== and[${outputs[output_i]}][i].out;`); - } - lines.push(`\t\tstates[i+1][${i}] <== multi_or[${multi_or_i}][i].out;`); - multi_or_checks2[outputs_key] = multi_or_i; - multi_or_i += 1; - } else { - lines.push(`\t\tstates[i+1][${i}] <== multi_or[${multi_or_checks2[outputs_key]}][i].out;`); - } + } + + if (cur_max - cur_min >= 16) { + min_maxes.push([cur_min, cur_max]); + } + for (const min_max of min_maxes) { + for (let code = min_max[0]; code <= min_max[1]; ++code) { + vals.delete(code); + } + } + + // for (let subsets of [ + // [digits, 47, 58], + // [symbols1, 57, 65], + // [uppercase, 64, 91], + // [symbols2, 90, 97], + // [lowercase, 96, 123], + // [symbols3, 122, 127] + // ]) { + // const subset = subsets[0]; + // const min = subsets[1]; + // const max = subsets[2]; + // if (vals.isSuperset(subset)) { + // vals.difference(subset); + // if (min_maxs.length == 0) { + // min_maxs.push([min, max]); + // } else { + // const last = min_maxs[min_maxs.length - 1]; + // if (last[1] - 1 == min) { + // min_maxs[min_maxs.length - 1][1] = max; + // } else { + // min_maxs.push([min, max]); + // } + // } + // } + // } + + for (let min_max of min_maxes) { + const min: number = min_max[0]; + const max: number = min_max[1]; + if (range_checks[min][max] === undefined) { + lines.push(`\t\tlt[${lt_i}][i] = LessEqThan(8);`); + lines.push(`\t\tlt[${lt_i}][i].in[0] <== ${min};`); + lines.push(`\t\tlt[${lt_i}][i].in[1] <== in[i];`); + + lines.push(`\t\tlt[${lt_i + 1}][i] = LessEqThan(8);`); + lines.push(`\t\tlt[${lt_i + 1}][i].in[0] <== in[i];`); + lines.push(`\t\tlt[${lt_i + 1}][i].in[1] <== ${max};`); + + lines.push(`\t\tand[${and_i}][i] = AND();`); + lines.push(`\t\tand[${and_i}][i].a <== lt[${lt_i}][i].out;`); + lines.push(`\t\tand[${and_i}][i].b <== lt[${lt_i + 1}][i].out;`); + + eq_outputs.push(["and", and_i]); + range_checks[min][max] = [lt_i, and_i]; + lt_i += 2; + and_i += 1; + } else { + let [_, and_i] = range_checks[min][max]; + eq_outputs.push(["and", and_i]); + } + } + for (let code of vals) { + if (eq_checks[code] === undefined) { + lines.push(`\t\teq[${eq_i}][i] = IsEqual();`); + lines.push(`\t\teq[${eq_i}][i].in[0] <== in[i];`); + lines.push(`\t\teq[${eq_i}][i].in[1] <== ${code};`); + eq_outputs.push(["eq", eq_i]); + eq_checks[code] = eq_i; + eq_i += 1; + } else { + eq_outputs.push(["eq", eq_checks[code]]); + } + } + + lines.push(`\t\tand[${and_i}][i] = AND();`); + lines.push(`\t\tand[${and_i}][i].a <== states[i][${prev_i}];`); + if (eq_outputs.length === 1) { + // if (is_negate) { + // lines.push(`\t\tand[${and_i}][i].b <== 1 - ${eq_outputs[0][0]}[${eq_outputs[0][1]}][i].out;`); + // } else { + // lines.push(`\t\tand[${and_i}][i].b <== ${eq_outputs[0][0]}[${eq_outputs[0][1]}][i].out;`); + // } + lines.push( + `\t\tand[${and_i}][i].b <== ${eq_outputs[0][0]}[${eq_outputs[0][1]}][i].out;` + ); + } else if (eq_outputs.length > 1) { + const eq_outputs_key: string = JSON.stringify(eq_outputs); + if (multi_or_checks1[eq_outputs_key] === undefined) { + lines.push( + `\t\tmulti_or[${multi_or_i}][i] = MultiOR(${eq_outputs.length});` + ); + for (let output_i = 0; output_i < eq_outputs.length; output_i++) { + lines.push( + `\t\tmulti_or[${multi_or_i}][i].in[${output_i}] <== ${eq_outputs[output_i][0]}[${eq_outputs[output_i][1]}][i].out;` + ); + } + // if (is_negate) { + // lines.push(`\t\tand[${and_i}][i].b <== 1 - multi_or[${multi_or_i}][i].out;`); + // } else { + // lines.push(`\t\tand[${and_i}][i].b <== multi_or[${multi_or_i}][i].out;`); + // } + lines.push( + `\t\tand[${and_i}][i].b <== multi_or[${multi_or_i}][i].out;` + ); + multi_or_checks1[eq_outputs_key] = multi_or_i; + multi_or_i += 1; + } else { + lines.push( + `\t\tand[${and_i}][i].b <== multi_or[${multi_or_checks1[eq_outputs_key]}][i].out;` + ); } + } - lines.push(`\t\tstate_changed[i].in[${i - 1}] <== states[i+1][${i}];`); + outputs.push(and_i); + and_i += 1; } - lines.push(`\t\tstates[i+1][0] <== 1 - state_changed[i].out;`); - lines.push(`\t}`); - - - const declarations: string[] = []; - declarations.push(`pragma circom 2.1.5;\n`); - declarations.push(`include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";\n`); - // declarations.push(`pragma circom 2.1.5;\ninclude "@zk-email/circuits/regexes/regex_helpers.circom";\n`); - declarations.push(`template ${template_name}(msg_bytes) {`); - declarations.push(`\tsignal input msg[msg_bytes];`); - declarations.push(`\tsignal output out;\n`); - declarations.push(`\tvar num_bytes = msg_bytes+1;`); - declarations.push(`\tsignal in[num_bytes];`); - declarations.push(`\tin[0]<==255;`); - declarations.push(`\tfor (var i = 0; i < msg_bytes; i++) {`); - declarations.push(`\t\tin[i+1] <== msg[i];`); - declarations.push(`\t}\n`); - if (eq_i > 0) { - declarations.push(`\tcomponent eq[${eq_i}][num_bytes];`); - } - if (lt_i > 0) { - declarations.push(`\tcomponent lt[${lt_i}][num_bytes];`); - } - if (and_i > 0) { - declarations.push(`\tcomponent and[${and_i}][num_bytes];`); - } - if (multi_or_i > 0) { - declarations.push(`\tcomponent multi_or[${multi_or_i}][num_bytes];`); + if (outputs.length === 1) { + lines.push(`\t\tstates[i+1][${i}] <== and[${outputs[0]}][i].out;`); + } else if (outputs.length > 1) { + const outputs_key: string = JSON.stringify(outputs); + if (multi_or_checks2[outputs_key] === undefined) { + lines.push( + `\t\tmulti_or[${multi_or_i}][i] = MultiOR(${outputs.length});` + ); + for (let output_i = 0; output_i < outputs.length; output_i++) { + lines.push( + `\t\tmulti_or[${multi_or_i}][i].in[${output_i}] <== and[${outputs[output_i]}][i].out;` + ); + } + lines.push(`\t\tstates[i+1][${i}] <== multi_or[${multi_or_i}][i].out;`); + multi_or_checks2[outputs_key] = multi_or_i; + multi_or_i += 1; + } else { + lines.push( + `\t\tstates[i+1][${i}] <== multi_or[${multi_or_checks2[outputs_key]}][i].out;` + ); + } } - declarations.push(`\tsignal states[num_bytes+1][${N}];`); - declarations.push(`\tcomponent state_changed[num_bytes];`); - declarations.push(""); - - const init_code: string[] = []; - init_code.push(`\tstates[0][0] <== 1;`); - init_code.push(`\tfor (var i = 1; i < ${N}; i++) {`); - init_code.push(`\t\tstates[0][i] <== 0;`); - init_code.push("\t}"); - init_code.push(""); - - lines = declarations.concat(init_code).concat(lines); - - const accept_node: number = accept_nodes_array[0]; - const accept_lines = [""]; - accept_lines.push("\tcomponent final_state_result = MultiOR(num_bytes+1);"); - accept_lines.push("\tfor (var i = 0; i <= num_bytes; i++) {"); - accept_lines.push(`\t\tfinal_state_result.in[i] <== states[i][${accept_node}];`); - accept_lines.push("\t}"); - accept_lines.push("\tout <== final_state_result.out;"); - lines = lines.concat(accept_lines); - let string: string = lines.reduce((res, line) => res + line + "\n", ""); - return string; + lines.push(`\t\tstate_changed[i].in[${i - 1}] <== states[i+1][${i}];`); + } + + lines.push(`\t\tstates[i+1][0] <== 1 - state_changed[i].out;`); + lines.push("\t}"); + + const declarations: string[] = []; + declarations.push(`pragma circom 2.1.5;\n`); + declarations.push( + `include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";\n` + ); + // declarations.push(`pragma circom 2.1.5;\ninclude "@zk-email/circuits/regexes/regex_helpers.circom";\n`); + declarations.push(`// regex: ${regex_str.replace(/\n/g, "\\n")}`); + declarations.push(`template ${template_name}(msg_bytes) {`); + declarations.push(`\tsignal input msg[msg_bytes];`); + declarations.push(`\tsignal output out;\n`); + declarations.push(`\tvar num_bytes = msg_bytes+1;`); + declarations.push(`\tsignal in[num_bytes];`); + declarations.push(`\tin[0]<==255;`); + declarations.push(`\tfor (var i = 0; i < msg_bytes; i++) {`); + declarations.push(`\t\tin[i+1] <== msg[i];`); + declarations.push(`\t}\n`); + if (eq_i > 0) { + declarations.push(`\tcomponent eq[${eq_i}][num_bytes];`); + } + if (lt_i > 0) { + declarations.push(`\tcomponent lt[${lt_i}][num_bytes];`); + } + if (and_i > 0) { + declarations.push(`\tcomponent and[${and_i}][num_bytes];`); + } + if (multi_or_i > 0) { + declarations.push(`\tcomponent multi_or[${multi_or_i}][num_bytes];`); + } + declarations.push(`\tsignal states[num_bytes+1][${N}];`); + declarations.push(`\tcomponent state_changed[num_bytes];`); + declarations.push(""); + + const init_code: string[] = []; + init_code.push(`\tstates[0][0] <== 1;`); + init_code.push(`\tfor (var i = 1; i < ${N}; i++) {`); + init_code.push(`\t\tstates[0][i] <== 0;`); + init_code.push("\t}"); + init_code.push(""); + + lines = declarations.concat(init_code).concat(lines); + + const accept_node: number = accept_nodes_array[0]; + const accept_lines = [""]; + accept_lines.push("\tcomponent final_state_result = MultiOR(num_bytes+1);"); + accept_lines.push("\tfor (var i = 0; i <= num_bytes; i++) {"); + accept_lines.push( + `\t\tfinal_state_result.in[i] <== states[i][${accept_node}];` + ); + accept_lines.push("\t}"); + accept_lines.push("\tout <== final_state_result.out;"); + + lines = lines.concat(accept_lines); + let string: string = lines.reduce((res, line) => res + line + "\n", ""); + return string; } // Commented these two out as they're only used by the code that's also commented out diff --git a/packages/compiler/src/js_caller.rs b/packages/compiler/src/js_caller.rs index 9ebe048..e3d8f87 100644 --- a/packages/compiler/src/js_caller.rs +++ b/packages/compiler/src/js_caller.rs @@ -47,9 +47,9 @@ pub fn regex_to_dfa(regex: &str) -> Result, JsCallerError> { Ok(serde_json::from_str(&result)?) } -pub fn gen_circom_allstr(graph: &[Value], template_name: &str) -> Result { +pub fn gen_circom_allstr(graph: &[Value], template_name: &str, regex_str: &str) -> Result { let code: &'static str = include_str!("gen_circom.js"); let mut script = Script::from_string(code)?; - let result: String = script.call("genCircomAllstr", (graph, template_name))?; + let result: String = script.call("genCircomAllstr", (graph, template_name, regex_str))?; Ok(result) } diff --git a/packages/compiler/src/lib.rs b/packages/compiler/src/lib.rs index dcb882c..6cfc0ac 100644 --- a/packages/compiler/src/lib.rs +++ b/packages/compiler/src/lib.rs @@ -6,8 +6,8 @@ pub mod js_caller; pub mod node; -#[cfg(test)] -mod tests; +// #[cfg(test)] +// mod tests; use crate::node::*; use neon; @@ -68,7 +68,8 @@ pub enum SoldityType { #[derive(Debug, Clone)] pub struct RegexAndDFA { // pub max_byte_size: usize, - // pub all_regex: String, + // Original regex string, only here to be printed in generated file to make it more reproducible + pub regex_str: String, pub dfa_val: Vec, pub substrs_defs: SubstrsDefs, } @@ -97,7 +98,7 @@ impl DecomposedRegexConfig { let substrs_defs = self.extract_substr_ids(&dfa_val)?; Ok(RegexAndDFA { // max_byte_size: self.max_byte_size, - // all_regex, + regex_str: all_regex, dfa_val, substrs_defs, }) @@ -295,8 +296,8 @@ impl DecomposedRegexConfig { let index_ends = part_regexes .iter() .map(|regex| { - // println!("regex {}", regex); - // println!("concat_str {}", concat_str); + println!("regex {}", regex); + println!("concat_str {}", concat_str); let found = regex.find(&concat_str).unwrap().unwrap(); // println!("found {:?}", found); if found.start() == found.end() { @@ -343,7 +344,7 @@ impl RegexAndDFA { Ok(RegexAndDFA { // max_byte_size, - // all_regex: regex_str.to_string(), + regex_str: regex_str.to_string(), dfa_val, substrs_defs, }) @@ -533,3 +534,26 @@ fn main(mut cx: neon::prelude::ModuleContext) -> neon::prelude::NeonResult<()> { cx.export_function("genFromRaw", gen_from_raw_node)?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use std::path::Path; + + #[test] + fn test_gen_from_decomposed() { + let decomposed_regex_path = Path::new("../circuits/common/subject_all.json"); + let circom_file_path = Some("../circuits/common/subject_all_regex.circom"); + let circom_template_name = Some("SubjectAllRegex"); + let gen_substrs = Some(true); + + let result = gen_from_decomposed( + decomposed_regex_path.to_str().unwrap(), + circom_file_path.map(|s| s), + circom_template_name.map(|s| s), + gen_substrs, + ); + + // assert!(result.is_ok()); + } +} diff --git a/packages/compiler/src/node.rs b/packages/compiler/src/node.rs index a7ceaa6..0932771 100644 --- a/packages/compiler/src/node.rs +++ b/packages/compiler/src/node.rs @@ -1,46 +1,50 @@ use crate::{gen_from_decomposed, gen_from_raw}; +use neon::context::Context; use neon::prelude::*; pub(crate) fn gen_from_decomposed_node(mut cx: FunctionContext) -> JsResult { + println!("Starting gen_from_decomposed_node function"); let decomposed_regex_path = cx.argument::(0)?.value(&mut cx); + println!("Decomposed regex path: {}", decomposed_regex_path); let obj = cx.argument::(1)?; + println!("Object: {:?}", obj); - // let halo2_dir_path = obj - // .get_opt::(&mut cx, "halo2DirPath")? - // .map(|v| { - // v.to_string(&mut cx) - // .expect("halo2DirPath must be null or string") - // .value(&mut cx) - // }); let circom_file_path = obj .get_opt::(&mut cx, "circomFilePath")? .map(|v| { - v.to_string(&mut cx) + let path = v.to_string(&mut cx) .expect("circomFilePath must be null or string") - .value(&mut cx) + .value(&mut cx); + println!("Circom file path: {}", path); + path }); let circom_template_name = obj .get_opt::(&mut cx, "templateName")? .map(|v| { - v.to_string(&mut cx) + let name = v.to_string(&mut cx) .expect("templateName must be null or string") - .value(&mut cx) + .value(&mut cx); + println!("Circom template name: {}", name); + name }); let gen_substrs = obj .get_opt::(&mut cx, "genSubstrs")? .map(|v| { - v.as_value(&mut cx) + let gen = v.as_value(&mut cx) .downcast::(&mut cx) .expect("genSubstrs must be null or boolean") - .value(&mut cx) + .value(&mut cx); + println!("Gen substrs: {}", gen); + gen }); + println!("Calling gen_from_decomposed function"); gen_from_decomposed( &decomposed_regex_path, - // halo2_dir_path.as_ref().map(|s| s.as_str()), circom_file_path.as_ref().map(|s| s.as_str()), circom_template_name.as_ref().map(|s| s.as_str()), gen_substrs, ); + println!("Finished gen_from_decomposed_node function"); Ok(cx.null()) }