diff --git a/certs/4096/client-cert.pem b/certs/4096/client-cert.pem new file mode 100644 index 000000000..57e303920 --- /dev/null +++ b/certs/4096/client-cert.pem @@ -0,0 +1,130 @@ +Certificate: + Data: + Version: 3 (0x2) + Serial Number: + 2f:0f:ab:23:bc:a3:14:07:91:06:55:35:01:63:7f:42:bd:fb:f2:43 + Signature Algorithm: sha256WithRSAEncryption + Issuer: C = US, ST = Montana, L = Bozeman, O = wolfSSL_4096, OU = Programming-4096, CN = www.wolfssl.com, emailAddress = info@wolfssl.com + Validity + Not Before: Jul 9 03:06:02 2019 GMT + Not After : Apr 4 03:06:02 2022 GMT + Subject: C = US, ST = Montana, L = Bozeman, O = wolfSSL_4096, OU = Programming-4096, CN = www.wolfssl.com, emailAddress = info@wolfssl.com + Subject Public Key Info: + Public Key Algorithm: rsaEncryption + RSA Public-Key: (4096 bit) + Modulus: + 00:f5:d0:31:e4:71:59:58:b3:07:50:dd:16:79:fc: + c6:95:50:fc:46:0e:57:12:86:71:8d:e3:9b:4a:33: + ea:4f:d9:17:13:6d:48:69:df:59:11:08:02:9d:af: + 2b:c7:30:be:0c:dc:87:d4:5a:12:09:23:5d:e1:76: + 5a:62:37:46:74:ef:03:05:bb:1e:6d:29:75:6c:2e: + 9d:87:0d:8f:87:cb:14:95:9b:be:17:6b:51:d1:4c: + da:d7:91:66:c5:36:eb:e0:07:1a:76:4d:b0:fb:c1: + f5:5e:05:db:ba:cb:25:d9:99:13:1c:c0:35:dc:40: + e9:36:cd:c4:d5:7a:41:70:0f:36:eb:a5:4e:17:05: + d5:75:1b:64:62:7a:3f:0d:28:48:6a:e3:ac:9c:a8: + 8f:e9:ed:f7:cd:24:a0:b1:a0:03:ac:e3:03:f5:3f: + d1:96:ff:2a:7e:08:b1:d3:e0:18:14:ec:65:37:50: + 43:c2:6a:8c:f4:5b:fe:c4:cb:8d:3f:81:02:f7:c2: + dd:e4:c1:8e:80:0c:04:25:2d:80:5a:2e:0f:22:35: + 4a:f4:85:ed:51:d8:ab:6d:8f:a2:3b:24:00:6e:81: + e2:1e:76:d6:ac:31:12:db:f3:8e:07:a1:de:89:4a: + 39:60:77:c5:aa:f1:51:e6:06:f1:95:56:2a:e1:8e: + 92:30:9f:fe:58:44:ac:46:f2:fd:9a:fc:a8:1d:a1: + d3:55:37:4a:8b:fc:9c:33:f8:a7:61:48:41:7c:9c: + 77:3f:f5:80:23:7d:43:b4:d5:88:0a:c9:75:d7:44: + 19:4d:77:6c:0b:0a:49:aa:1c:2f:d6:5a:44:a6:47: + 4d:e5:36:96:40:99:2c:56:26:b1:f2:92:31:59:d7: + 2c:d4:b4:21:d6:65:13:0b:3e:fb:ff:04:eb:b9:85: + b9:d8:d8:28:4f:5c:17:96:a3:51:be:fe:7d:0b:1b: + 48:40:25:76:94:dc:41:fb:bf:73:76:da:eb:b3:62: + e7:c1:c8:54:6a:93:e1:8d:31:e8:3e:3e:df:bc:87: + 02:30:22:57:c4:e0:18:7a:d3:ae:e4:02:9b:aa:bd: + 4e:49:47:72:e9:8d:13:2d:54:9b:00:a7:91:61:71: + c9:cc:48:4f:ee:df:5e:1b:1a:df:67:d3:20:e6:44: + 45:98:7e:e7:0e:63:16:83:c9:26:5d:90:c1:e5:2a: + 5c:45:54:13:b2:81:18:06:20:2e:2e:66:5a:b5:7b: + 6e:d6:0c:4e:89:01:56:70:bb:ae:de:e9:99:5e:d1: + b9:3a:b7:6c:17:b6:03:a9:08:dd:9c:f4:14:c9:c9: + 59:39:72:d4:7e:02:37:31:cd:0e:a7:3d:f8:f2:cf: + 6b:15:ab + Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Subject Key Identifier: + FA:54:89:67:E5:5F:B7:31:40:EA:FD:E7:F6:A3:C6:5A:56:16:A5:6E + X509v3 Authority Key Identifier: + keyid:FA:54:89:67:E5:5F:B7:31:40:EA:FD:E7:F6:A3:C6:5A:56:16:A5:6E + DirName:/C=US/ST=Montana/L=Bozeman/O=wolfSSL_4096/OU=Programming-4096/CN=www.wolfssl.com/emailAddress=info@wolfssl.com + serial:2F:0F:AB:23:BC:A3:14:07:91:06:55:35:01:63:7F:42:BD:FB:F2:43 + + X509v3 Basic Constraints: + CA:TRUE + Signature Algorithm: sha256WithRSAEncryption + 57:0d:97:98:78:bf:2a:31:9a:39:41:38:33:46:d5:50:47:e8: + 19:62:a8:36:1e:b7:fd:d1:bc:50:5c:3a:eb:96:1a:9b:43:b0: + 67:5d:f4:51:77:87:33:0b:90:6f:e8:d3:82:4d:1a:aa:93:5f: + 7d:78:b1:e0:7b:ee:88:01:e7:b3:fa:7e:0b:76:9c:9e:81:36: + e4:a3:c1:41:62:a4:0a:7e:24:d0:ab:9f:ba:d8:1e:38:ad:f1: + 12:52:0d:f2:96:8a:0b:25:a2:49:3f:88:5b:ea:23:87:26:22: + 7a:b9:60:6b:d6:7a:88:37:ac:64:9b:18:51:07:ea:df:00:96: + 70:95:88:9d:8f:af:be:3c:4e:c7:5e:55:15:3d:1f:e4:2d:dc: + c9:a3:ae:af:fa:44:a8:e2:f4:df:8e:cd:f9:10:7f:8b:86:cc: + 6d:45:91:91:4f:e3:d0:a7:d2:d9:8e:09:c6:f8:eb:e7:bd:17: + 19:d6:e7:1a:b8:ca:4d:ec:34:07:7d:2d:e8:23:9d:82:e9:f7: + 47:03:ab:5f:7c:f5:41:6f:70:11:cb:24:d8:23:c2:65:31:b7: + 0b:8f:0a:26:5b:0f:f6:9b:11:7f:9a:8d:94:6d:5a:9c:5e:73: + 35:15:7b:e3:09:e8:08:d0:3f:b4:e5:29:2c:f6:3e:71:6e:f4: + 1b:20:55:34:40:2f:b0:9b:dd:f1:dc:bf:17:1d:a7:2d:85:01: + d6:d2:b2:56:56:98:33:85:ed:f6:a3:f6:3e:7b:f4:03:a4:58: + 8e:c5:5b:ab:66:e8:0f:34:17:2d:33:36:71:0c:b8:d9:78:e7: + 06:fc:da:4f:a1:fa:db:74:ce:ea:85:27:f9:75:a9:ad:50:86: + 6e:ea:01:01:19:0d:28:4a:ed:06:be:65:70:b2:06:46:2e:16: + 57:df:55:c7:8e:cd:5b:ad:66:28:b8:74:87:bf:c4:c7:08:3f: + 37:a3:23:84:9f:4e:e8:48:6c:8d:54:9f:fb:e0:fb:53:a3:41: + e1:68:8a:94:c9:f5:ee:3e:15:46:d2:62:33:86:86:06:34:b4: + e4:2f:da:28:2e:2f:c0:bd:75:e8:2c:3f:e2:a5:43:7d:02:eb: + 25:b9:ef:87:8a:d7:57:61:16:e8:9e:83:65:f9:10:f4:5e:5f: + 1c:7a:25:d6:47:bd:29:c5:4f:8b:b9:6a:48:7a:9b:1e:6d:77: + 8e:72:6c:0c:07:fe:4c:c5:cf:55:0e:cb:4b:ad:16:e1:e2:54: + b8:9d:34:03:d1:8d:b7:37:9b:e3:5a:32:60:03:7f:61:0f:50: + 0b:72:54:8b:0d:c7:97:7e:bb:9b:b2:f7:73:47:71:7b:78:65: + 36:df:57:72:9e:42:9c:8a +-----BEGIN CERTIFICATE----- +MIIG4DCCBMigAwIBAgIULw+rI7yjFAeRBlU1AWN/Qr378kMwDQYJKoZIhvcNAQEL +BQAwgZ4xCzAJBgNVBAYTAlVTMRAwDgYDVQQIDAdNb250YW5hMRAwDgYDVQQHDAdC +b3plbWFuMRUwEwYDVQQKDAx3b2xmU1NMXzQwOTYxGTAXBgNVBAsMEFByb2dyYW1t +aW5nLTQwOTYxGDAWBgNVBAMMD3d3dy53b2xmc3NsLmNvbTEfMB0GCSqGSIb3DQEJ +ARYQaW5mb0B3b2xmc3NsLmNvbTAeFw0xOTA3MDkwMzA2MDJaFw0yMjA0MDQwMzA2 +MDJaMIGeMQswCQYDVQQGEwJVUzEQMA4GA1UECAwHTW9udGFuYTEQMA4GA1UEBwwH +Qm96ZW1hbjEVMBMGA1UECgwMd29sZlNTTF80MDk2MRkwFwYDVQQLDBBQcm9ncmFt +bWluZy00MDk2MRgwFgYDVQQDDA93d3cud29sZnNzbC5jb20xHzAdBgkqhkiG9w0B +CQEWEGluZm9Ad29sZnNzbC5jb20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK +AoICAQD10DHkcVlYswdQ3RZ5/MaVUPxGDlcShnGN45tKM+pP2RcTbUhp31kRCAKd +ryvHML4M3IfUWhIJI13hdlpiN0Z07wMFux5tKXVsLp2HDY+HyxSVm74Xa1HRTNrX +kWbFNuvgBxp2TbD7wfVeBdu6yyXZmRMcwDXcQOk2zcTVekFwDzbrpU4XBdV1G2Ri +ej8NKEhq46ycqI/p7ffNJKCxoAOs4wP1P9GW/yp+CLHT4BgU7GU3UEPCaoz0W/7E +y40/gQL3wt3kwY6ADAQlLYBaLg8iNUr0he1R2Kttj6I7JABugeIedtasMRLb844H +od6JSjlgd8Wq8VHmBvGVVirhjpIwn/5YRKxG8v2a/KgdodNVN0qL/Jwz+KdhSEF8 +nHc/9YAjfUO01YgKyXXXRBlNd2wLCkmqHC/WWkSmR03lNpZAmSxWJrHykjFZ1yzU +tCHWZRMLPvv/BOu5hbnY2ChPXBeWo1G+/n0LG0hAJXaU3EH7v3N22uuzYufByFRq +k+GNMeg+Pt+8hwIwIlfE4Bh6067kApuqvU5JR3LpjRMtVJsAp5FhccnMSE/u314b +Gt9n0yDmREWYfucOYxaDySZdkMHlKlxFVBOygRgGIC4uZlq1e27WDE6JAVZwu67e +6Zle0bk6t2wXtgOpCN2c9BTJyVk5ctR+AjcxzQ6nPfjyz2sVqwIDAQABo4IBEjCC +AQ4wHQYDVR0OBBYEFPpUiWflX7cxQOr95/ajxlpWFqVuMIHeBgNVHSMEgdYwgdOA +FPpUiWflX7cxQOr95/ajxlpWFqVuoYGkpIGhMIGeMQswCQYDVQQGEwJVUzEQMA4G +A1UECAwHTW9udGFuYTEQMA4GA1UEBwwHQm96ZW1hbjEVMBMGA1UECgwMd29sZlNT +TF80MDk2MRkwFwYDVQQLDBBQcm9ncmFtbWluZy00MDk2MRgwFgYDVQQDDA93d3cu +d29sZnNzbC5jb20xHzAdBgkqhkiG9w0BCQEWEGluZm9Ad29sZnNzbC5jb22CFC8P +qyO8oxQHkQZVNQFjf0K9+/JDMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQAD +ggIBAFcNl5h4vyoxmjlBODNG1VBH6BliqDYet/3RvFBcOuuWGptDsGdd9FF3hzML +kG/o04JNGqqTX314seB77ogB57P6fgt2nJ6BNuSjwUFipAp+JNCrn7rYHjit8RJS +DfKWigslokk/iFvqI4cmInq5YGvWeog3rGSbGFEH6t8AlnCViJ2Pr748TsdeVRU9 +H+Qt3Mmjrq/6RKji9N+OzfkQf4uGzG1FkZFP49Cn0tmOCcb46+e9FxnW5xq4yk3s +NAd9LegjnYLp90cDq1989UFvcBHLJNgjwmUxtwuPCiZbD/abEX+ajZRtWpxeczUV +e+MJ6AjQP7TlKSz2PnFu9BsgVTRAL7Cb3fHcvxcdpy2FAdbSslZWmDOF7faj9j57 +9AOkWI7FW6tm6A80Fy0zNnEMuNl45wb82k+h+tt0zuqFJ/l1qa1Qhm7qAQEZDShK +7Qa+ZXCyBkYuFlffVceOzVutZii4dIe/xMcIPzejI4SfTuhIbI1Un/vg+1OjQeFo +ipTJ9e4+FUbSYjOGhgY0tOQv2iguL8C9degsP+KlQ30C6yW574eK11dhFuieg2X5 +EPReXxx6JdZHvSnFT4u5akh6mx5td45ybAwH/kzFz1UOy0utFuHiVLidNAPRjbc3 +m+NaMmADf2EPUAtyVIsNx5d+u5uy93NHcXt4ZTbfV3KeQpyK +-----END CERTIFICATE----- diff --git a/certs/4096/client-key.pem b/certs/4096/client-key.pem new file mode 100644 index 000000000..cbe8090b4 --- /dev/null +++ b/certs/4096/client-key.pem @@ -0,0 +1,51 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIJKAIBAAKCAgEA9dAx5HFZWLMHUN0WefzGlVD8Rg5XEoZxjeObSjPqT9kXE21I +ad9ZEQgCna8rxzC+DNyH1FoSCSNd4XZaYjdGdO8DBbsebSl1bC6dhw2Ph8sUlZu+ +F2tR0Uza15FmxTbr4Acadk2w+8H1XgXbussl2ZkTHMA13EDpNs3E1XpBcA8266VO +FwXVdRtkYno/DShIauOsnKiP6e33zSSgsaADrOMD9T/Rlv8qfgix0+AYFOxlN1BD +wmqM9Fv+xMuNP4EC98Ld5MGOgAwEJS2AWi4PIjVK9IXtUdirbY+iOyQAboHiHnbW +rDES2/OOB6HeiUo5YHfFqvFR5gbxlVYq4Y6SMJ/+WESsRvL9mvyoHaHTVTdKi/yc +M/inYUhBfJx3P/WAI31DtNWICsl110QZTXdsCwpJqhwv1lpEpkdN5TaWQJksViax +8pIxWdcs1LQh1mUTCz77/wTruYW52NgoT1wXlqNRvv59CxtIQCV2lNxB+79zdtrr +s2LnwchUapPhjTHoPj7fvIcCMCJXxOAYetOu5AKbqr1OSUdy6Y0TLVSbAKeRYXHJ +zEhP7t9eGxrfZ9Mg5kRFmH7nDmMWg8kmXZDB5SpcRVQTsoEYBiAuLmZatXtu1gxO +iQFWcLuu3umZXtG5OrdsF7YDqQjdnPQUyclZOXLUfgI3Mc0Opz348s9rFasCAwEA +AQKCAgEAxXZXffFoGo7GY7kWoyvhwnTqEsTWQXVqptaeGn+VzErR9LMnJpVakQnk +QBNFkZ+gK+jD3Fv2fQzCD6npdVh96tVNkj78dCiHwT25IZJNKIKEqKIRk/KMKRwZ ++G0/J1G1LaPHKB3E/JiUqND/8A/c+e2zorbtDV+/eFzXr72j74bpUWbbUjdHf+lf +PJSDLeicM/Fs6fOml/6nv02bINUv3qQGu+5mSWv1EIWfhFpSPgygSkzaAcViMbHs ++N2jO85BOhJ5+ZdbB5WfhtYEc2zoj0xMSB2FxOfO3hYx9lw3VI5VvK8uR+isA7Ck ++ZCYmaTcbpgIXAe7CJOvYY10qPjEiWQQ4ebAzR05INZaiYP8N+ISZqgSzHK7Hvtq +43xxfrkujoRm4bnQJZpvnRnmfujY8MUjFppoLB1Vro6Q7o7sXkadYFIyFyhZxEkq +ID6Vxd/2PffFz7HCyXb4Pb70Y/wqAG+ZpratNe7exeCXxnPuM6Co/EyP8oxh+wMZ +oegXTuMhWM7+8l+73U/3GMs1V93lUCp7GukS8noRsUO5cAcMj2m55aXJ4huWdBH1 +lblYwL03+ygqvYSxK2dCgsOVVUXV6sOKQjpDF17N0ur832fs4WyoAxmyHUpfT+fT +4IbFGhDDCNLthZMIUQWmNxUyvWxzYwFdW09q3G0dVZEh5I638IECggEBAP0nyP52 +XIkyy4oih2FIkUoFraRcispcAoh+UcVmkCyj7adDGQuiQrTg4EW//qDydQuOfZ1z +Z9MQCcXZjK06ZHKtljWRD0vJvU9lR6Yt6z/imXJmEu3r0nz/OiA3KtNlUZvDqhix +H26dQEekH4Kb21Brhi/7PzG5gREEFGOGT0Aq9fl8oXgZE9BRUQ95iI0Uo962MylC +uehZdvdDGram3wrBQsc/HH5cLJFLHvhGkR/uVrMOyNAx0z3tPdnFMAxY2Le17BSs +QWRt5MZZ/RQFYGXYxIREfhu0pBZ1wSeWshnWOVTAk/PXH80b3/gSiBSfmAVHRnGB +bN+R71PjxbGJL+ECggEBAPiTSih3lO/pxArD6FJZth2NzhTnQ8btCSdd844Iahlr +LJebiFMr2v5LlGaE1anOpUNw+wFab8330Z1R7qDcRvV9p+6ghreD/yGLdgV93sQm +Nry0ikjDBpCX5aY4w+Z80Pgj0jMfgcPjfYVaOBAD5ojbyEzQ97JNJzOFzTp0g2uC +WNnf7vXT6f4c7wYSFtFMrlRLDRq94s9Ws3S+RE+kcwqYjWGEOEbclc8/a+dlhwK/ +S1fiPcQrHIIdzBN/wAYSjG+XUHuMgcMjFetwB46hBx5Z+hDKfg/iu+6GJh5VuZhm +hewnxdljjVF3qqA2VTMQIV7sR2dx0a/8PlD1vtaS5wsCggEAIXyKxMYpVWinrd0F +ZWPw/AamQnCPV1c2apGzBVacyZrhi9d/T5+mDUEVyYQtDWMlAmNV0Gb8m9mqQUaW +qi9oLBc0IF/Q0yibZw4xnRTD4o55170S0e/4xtoH+Uzy2EW1ttH6BQwg6UPZxeA6 +3s75ArlGZcBpSo2MOhD9FXEluIo2QUswHK/MhCjNfSuJWYgaaRJW0CVobAixiOGS +fgiyxjxsNejuPvS4XHvAW/0Ro1SmmUbiX0/H7pAcN1szEN8Lw7lHwjBK8hrrQSWU +KXrQlohG7mwU9ls9vU7UPwVbB7njmYdjysRxC3Ode7YP1BKMTF5yPf9txGEMdF9T +vjk0YQKCAQBf8vKwFiCOTsyWXzKA/xH17HO8y9v0oDBlWrWVgJf7wcvPpYCEoiwA +9omM3P9gcVyHYMfyqMb5WQw3TpXuz7gwMFWvHZWCptfHSf6/deuUCTAdvQ6XsXgK +Piet9sFfaZR8A8+yXhoH0/ryi3WScP7+mt+BDzRdRby4/Y/PXYQQ7pp/Vxn1F9x9 +cwusazUViyTLcsDXLq6q28ufZ4YUu+SQFXyVRKU4bRMCkXeENUNdAxwBC1pOK1nw +u7G3YRts/KHqvRya5Ax+lz9xxqeUHYIS7CZDbvYkCaADHRL/qJVgR0qwclXDaNL2 +vFtHRlGyySooasnRGzUWWiZvt7v3NXMrAoIBAFa62ALXSzBeGx4v8w288QVqaErh +6rPeYYyJRLpjXt8FJDJxZRo2L7wHdaPOnlKSlU0/yQa8oRQzN5WrmusE9hXDmxBW +U6Io8mjafZdSY6ybVqmrLh6eAXD/K20MS6bDOrPRp0teSS6V1mquWBNmjy+T5G6L ++pQwPuyWq0YgPsUwtOtBADlgHeEgzjFwFznLdlZsVXuQILw5slvRKG8MT0VrgsRX +Iww/Py2Dsz2O+Rrad1Qu/hYuupndyrPR2LuH4dCp1OaP6AA+SYrdpjKRADExIZgY +lMktJwW3mwkuu12/Z+gO0UR1gB0KIY+VdrD8GTz/kuoBRYnRTv5NK0sY5s4= +-----END RSA PRIVATE KEY----- diff --git a/certs/4096/include.am b/certs/4096/include.am new file mode 100644 index 000000000..0c8305299 --- /dev/null +++ b/certs/4096/include.am @@ -0,0 +1,9 @@ +# vim:ft=automake +# All paths should be given relative to the root +# + +EXTRA_DIST += \ + certs/4096/client-cert.pem \ + certs/4096/client-key.pem + + diff --git a/certs/include.am b/certs/include.am index a3db1baca..98c31a107 100644 --- a/certs/include.am +++ b/certs/include.am @@ -99,6 +99,7 @@ dist_doc_DATA+= certs/taoCert.txt EXTRA_DIST+= certs/ntru-key.raw include certs/1024/include.am +include certs/4096/include.am include certs/crl/include.am include certs/ecc/include.am include certs/ed25519/include.am diff --git a/certs/renewcerts.sh b/certs/renewcerts.sh index f34e07edd..662bc4b3e 100755 --- a/certs/renewcerts.sh +++ b/certs/renewcerts.sh @@ -166,6 +166,25 @@ run_renewcerts(){ echo "End of section" echo "---------------------------------------------------------------------" ############################################################ + #### update the self-signed (4096-bit) client-cert.pem ##### + ############################################################ + echo "Updating 4096-bit client-cert.pem" + echo "" + #pipe the following arguments to openssl req... + echo -e "US\\nMontana\\nBozeman\\nwolfSSL_4096\\nProgramming-4096\\nwww.wolfssl.com\\ninfo@wolfssl.com\\n.\\n.\\n" | openssl req -new -key ./4096/client-key.pem -config ./wolfssl.cnf -nodes -out ./4096/client-cert.csr + check_result $? "Step 1" + + + openssl x509 -req -in ./4096/client-cert.csr -days 1000 -extfile wolfssl.cnf -extensions wolfssl_opts -signkey ./4096/client-key.pem -out ./4096/client-cert.pem + check_result $? "Step 2" + rm ./4096/client-cert.csr + + openssl x509 -in ./4096/client-cert.pem -text > ./4096/tmp.pem + check_result $? "Step 3" + mv ./4096/tmp.pem ./4096/client-cert.pem + echo "End of section" + echo "---------------------------------------------------------------------" + ############################################################ ########## update the self-signed ca-cert.pem ############## ############################################################ echo "Updating ca-cert.pem" diff --git a/configure.ac b/configure.ac index 0d392a119..a0467de51 100644 --- a/configure.ac +++ b/configure.ac @@ -4082,6 +4082,11 @@ do AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_NO_2048" ;; + +4096) + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_4096" + AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_4096" + ;; + *) AC_MSG_ERROR([Invalid choice of Single Precision length in bits [256, 2048, 3072]: $ENABLED_SP.]) break;; diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c index 054e5c3c2..0481ebd74 100644 --- a/wolfcrypt/src/dh.c +++ b/wolfcrypt/src/dh.c @@ -1244,6 +1244,10 @@ static int GeneratePublicDh(DhKey* key, byte* priv, word32 privSz, if (mp_count_bits(&key->p) == 3072) return sp_DhExp_3072(&key->g, priv, privSz, &key->p, pub, pubSz); #endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->p) == 4096) + return sp_DhExp_4096(&key->g, priv, privSz, &key->p, pub, pubSz); +#endif #endif #ifndef WOLFSSL_SP_MATH @@ -1477,6 +1481,14 @@ int wc_DhCheckPubKey_ex(DhKey* key, const byte* pub, word32 pubSz, } else #endif +#ifdef WOLFSSL_SP_NO_4096 + if (mp_count_bits(&key->p) == 4096) { + ret = sp_ModExp_4096(y, q, p, y); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif #endif { @@ -1756,6 +1768,14 @@ int wc_DhCheckKeyPair(DhKey* key, const byte* pub, word32 pubSz, } else #endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->p) == 4096) { + ret = sp_ModExp_4096(&key->g, privateKey, &key->p, checkKey); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif #endif { #ifndef WOLFSSL_SP_MATH @@ -1905,6 +1925,28 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, return ret; } #endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->p) == 4096) { + if (mp_init(y) != MP_OKAY) + return MP_INIT_E; + + if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY) + ret = MP_READ_E; + + if (ret == 0) + ret = sp_DhExp_4096(y, priv, privSz, &key->p, agree, agreeSz); + + mp_clear(y); + #ifdef WOLFSSL_SMALL_STACK + #ifndef WOLFSSL_SP_MATH + XFREE(z, key->heap, DYNAMIC_TYPE_DH); + XFREE(x, key->heap, DYNAMIC_TYPE_DH); + #endif + XFREE(y, key->heap, DYNAMIC_TYPE_DH); + #endif + return ret; + } +#endif #endif #ifndef WOLFSSL_SP_MATH diff --git a/wolfcrypt/src/integer.c b/wolfcrypt/src/integer.c index e94c36e11..d2d990367 100644 --- a/wolfcrypt/src/integer.c +++ b/wolfcrypt/src/integer.c @@ -86,6 +86,8 @@ WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res); WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); #ifdef __cplusplus } /* extern "C" */ #endif @@ -4632,6 +4634,11 @@ static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result) err = sp_ModExp_3072(b, &r, a, &y); else #endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(a) == 4096) + err = sp_ModExp_4096(b, &r, a, &y); + else +#endif #endif err = mp_exptmod (b, &r, a, &y); if (err != MP_OKAY) diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index 40baa26e9..8205d480b 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -629,6 +629,17 @@ int wc_CheckRsaKey(RsaKey* key) } else #endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->n) == 4096) { + ret = sp_ModExp_4096(k, &key->e, &key->n, tmp); + if (ret != 0) + ret = MP_EXPTMOD_E; + ret = sp_ModExp_4096(tmp, &key->d, &key->n, tmp); + if (ret != 0) + ret = MP_EXPTMOD_E; + } + else +#endif #endif #ifdef WOLFSSL_SP_MATH { @@ -2063,6 +2074,31 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, } } #endif +#ifdef WOLFSSL_SP_4096 + if (mp_count_bits(&key->n) == 4096) { + switch(type) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + case RSA_PRIVATE_DECRYPT: + case RSA_PRIVATE_ENCRYPT: + #ifdef WC_RSA_BLINDING + if (rng == NULL) + return MISSING_RNG_E; + #endif + #ifndef RSA_LOW_MEM + return sp_RsaPrivate_4096(in, inLen, &key->d, &key->p, &key->q, + &key->dP, &key->dQ, &key->u, &key->n, + out, outLen); + #else + return sp_RsaPrivate_4096(in, inLen, &key->d, &key->p, &key->q, + NULL, NULL, NULL, &key->n, out, outLen); + #endif +#endif + case RSA_PUBLIC_ENCRYPT: + case RSA_PUBLIC_DECRYPT: + return sp_RsaPublic_4096(in, inLen, &key->e, &key->n, out, outLen); + } + } +#endif #endif /* WOLFSSL_HAVE_SP_RSA */ #ifdef WOLFSSL_SP_MATH @@ -3971,6 +4007,13 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) XFREE(buf, key->heap, DYNAMIC_TYPE_RSA); } + if (err == MP_OKAY && mp_cmp(&p, &q) < 0) { + err = mp_copy(&p, &tmp1); + if (err == MP_OKAY) + err = mp_copy(&q, &p); + if (err == MP_OKAY) + mp_copy(&tmp1, &q); + } /* Setup RsaKey buffers */ if (err == MP_OKAY) diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index de6f0fb77..c1c085286 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -1326,7 +1326,7 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, u += sp_2048_add_16(r + 8, r + 8, z1); r[24] = u; XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - sp_2048_add_16(r + 16, r + 16, z2); + (void)sp_2048_add_16(r + 16, r + 16, z2); } /* Square a and put result in r. (r = a * a) @@ -1353,7 +1353,7 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) u += sp_2048_add_16(r + 8, r + 8, z1); r[24] = u; XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - sp_2048_add_16(r + 16, r + 16, z2); + (void)sp_2048_add_16(r + 16, r + 16, z2); } /* Sub b from a into a. (a -= b) @@ -1713,7 +1713,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, u += sp_2048_add_32(r + 16, r + 16, z1); r[48] = u; XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - sp_2048_add_32(r + 32, r + 32, z2); + (void)sp_2048_add_32(r + 32, r + 32, z2); } /* Square a and put result in r. (r = a * a) @@ -1740,7 +1740,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) u += sp_2048_add_32(r + 16, r + 16, z1); r[48] = u; XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - sp_2048_add_32(r + 32, r + 32, z2); + (void)sp_2048_add_32(r + 32, r + 32, z2); } /* Sub b from a into a. (a -= b) @@ -2356,7 +2356,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, u += sp_2048_add_64(r + 32, r + 32, z1); r[96] = u; XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); - sp_2048_add_64(r + 64, r + 64, z2); + (void)sp_2048_add_64(r + 64, r + 64, z2); } /* Square a and put result in r. (r = a * a) @@ -2383,7 +2383,7 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) u += sp_2048_add_64(r + 32, r + 32, z1); r[96] = u; XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); - sp_2048_add_64(r + 64, r + 64, z2); + (void)sp_2048_add_64(r + 64, r + 64, z2); } #endif /* !WOLFSSL_SP_SMALL */ @@ -10639,7 +10639,7 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, u += sp_3072_add_24(r + 12, r + 12, z1); r[36] = u; XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - sp_3072_add_24(r + 24, r + 24, z2); + (void)sp_3072_add_24(r + 24, r + 24, z2); } /* Square a and put result in r. (r = a * a) @@ -10666,7 +10666,7 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) u += sp_3072_add_24(r + 12, r + 12, z1); r[36] = u; XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - sp_3072_add_24(r + 24, r + 24, z2); + (void)sp_3072_add_24(r + 24, r + 24, z2); } /* Sub b from a into a. (a -= b) @@ -11154,7 +11154,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, u += sp_3072_add_48(r + 24, r + 24, z1); r[72] = u; XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - sp_3072_add_48(r + 48, r + 48, z2); + (void)sp_3072_add_48(r + 48, r + 48, z2); } /* Square a and put result in r. (r = a * a) @@ -11181,7 +11181,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) u += sp_3072_add_48(r + 24, r + 24, z1); r[72] = u; XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - sp_3072_add_48(r + 48, r + 48, z2); + (void)sp_3072_add_48(r + 48, r + 48, z2); } /* Sub b from a into a. (a -= b) @@ -12053,7 +12053,7 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, u += sp_3072_add_96(r + 48, r + 48, z1); r[144] = u; XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); - sp_3072_add_96(r + 96, r + 96, z2); + (void)sp_3072_add_96(r + 96, r + 96, z2); } /* Square a and put result in r. (r = a * a) @@ -12080,7 +12080,7 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) u += sp_3072_add_96(r + 48, r + 48, z1); r[144] = u; XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); - sp_3072_add_96(r + 96, r + 96, z2); + (void)sp_3072_add_96(r + 96, r + 96, z2); } #endif /* !WOLFSSL_SP_SMALL */ @@ -19828,6 +19828,52421 @@ int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #endif /* !WOLFSSL_SP_NO_3072 */ +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<128 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/ + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = r[i] >> b; b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[a], #72]\n\t" + "ldr r7, [%[a], #76]\n\t" + "ldr r8, [%[b], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "ldr r10, [%[b], #72]\n\t" + "ldr r14, [%[b], #76]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" + "str r6, [%[r], #72]\n\t" + "str r7, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[a], #88]\n\t" + "ldr r7, [%[a], #92]\n\t" + "ldr r8, [%[b], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "ldr r10, [%[b], #88]\n\t" + "ldr r14, [%[b], #92]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" + "str r6, [%[r], #88]\n\t" + "str r7, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[a], #104]\n\t" + "ldr r7, [%[a], #108]\n\t" + "ldr r8, [%[b], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "ldr r10, [%[b], #104]\n\t" + "ldr r14, [%[b], #108]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" + "str r6, [%[r], #104]\n\t" + "str r7, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[a], #120]\n\t" + "ldr r7, [%[a], #124]\n\t" + "ldr r8, [%[b], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "ldr r10, [%[b], #120]\n\t" + "ldr r14, [%[b], #124]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" + "str r6, [%[r], #120]\n\t" + "str r7, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[a], #132]\n\t" + "ldr r6, [%[a], #136]\n\t" + "ldr r7, [%[a], #140]\n\t" + "ldr r8, [%[b], #128]\n\t" + "ldr r9, [%[b], #132]\n\t" + "ldr r10, [%[b], #136]\n\t" + "ldr r14, [%[b], #140]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #132]\n\t" + "str r6, [%[r], #136]\n\t" + "str r7, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[a], #148]\n\t" + "ldr r6, [%[a], #152]\n\t" + "ldr r7, [%[a], #156]\n\t" + "ldr r8, [%[b], #144]\n\t" + "ldr r9, [%[b], #148]\n\t" + "ldr r10, [%[b], #152]\n\t" + "ldr r14, [%[b], #156]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #144]\n\t" + "str r5, [%[r], #148]\n\t" + "str r6, [%[r], #152]\n\t" + "str r7, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[a], #164]\n\t" + "ldr r6, [%[a], #168]\n\t" + "ldr r7, [%[a], #172]\n\t" + "ldr r8, [%[b], #160]\n\t" + "ldr r9, [%[b], #164]\n\t" + "ldr r10, [%[b], #168]\n\t" + "ldr r14, [%[b], #172]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #160]\n\t" + "str r5, [%[r], #164]\n\t" + "str r6, [%[r], #168]\n\t" + "str r7, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[a], #180]\n\t" + "ldr r6, [%[a], #184]\n\t" + "ldr r7, [%[a], #188]\n\t" + "ldr r8, [%[b], #176]\n\t" + "ldr r9, [%[b], #180]\n\t" + "ldr r10, [%[b], #184]\n\t" + "ldr r14, [%[b], #188]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #176]\n\t" + "str r5, [%[r], #180]\n\t" + "str r6, [%[r], #184]\n\t" + "str r7, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[a], #196]\n\t" + "ldr r6, [%[a], #200]\n\t" + "ldr r7, [%[a], #204]\n\t" + "ldr r8, [%[b], #192]\n\t" + "ldr r9, [%[b], #196]\n\t" + "ldr r10, [%[b], #200]\n\t" + "ldr r14, [%[b], #204]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #192]\n\t" + "str r5, [%[r], #196]\n\t" + "str r6, [%[r], #200]\n\t" + "str r7, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[a], #212]\n\t" + "ldr r6, [%[a], #216]\n\t" + "ldr r7, [%[a], #220]\n\t" + "ldr r8, [%[b], #208]\n\t" + "ldr r9, [%[b], #212]\n\t" + "ldr r10, [%[b], #216]\n\t" + "ldr r14, [%[b], #220]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #208]\n\t" + "str r5, [%[r], #212]\n\t" + "str r6, [%[r], #216]\n\t" + "str r7, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[a], #228]\n\t" + "ldr r6, [%[a], #232]\n\t" + "ldr r7, [%[a], #236]\n\t" + "ldr r8, [%[b], #224]\n\t" + "ldr r9, [%[b], #228]\n\t" + "ldr r10, [%[b], #232]\n\t" + "ldr r14, [%[b], #236]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #224]\n\t" + "str r5, [%[r], #228]\n\t" + "str r6, [%[r], #232]\n\t" + "str r7, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[a], #244]\n\t" + "ldr r6, [%[a], #248]\n\t" + "ldr r7, [%[a], #252]\n\t" + "ldr r8, [%[b], #240]\n\t" + "ldr r9, [%[b], #244]\n\t" + "ldr r10, [%[b], #248]\n\t" + "ldr r14, [%[b], #252]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #240]\n\t" + "str r5, [%[r], #244]\n\t" + "str r6, [%[r], #248]\n\t" + "str r7, [%[r], #252]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #0]\n\t" + "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #0]\n\t" + "str r3, [%[a], #4]\n\t" + "str r4, [%[a], #8]\n\t" + "str r5, [%[a], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #16]\n\t" + "str r3, [%[a], #20]\n\t" + "str r4, [%[a], #24]\n\t" + "str r5, [%[a], #28]\n\t" + "ldr r2, [%[a], #32]\n\t" + "ldr r3, [%[a], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "ldr r8, [%[b], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #32]\n\t" + "str r3, [%[a], #36]\n\t" + "str r4, [%[a], #40]\n\t" + "str r5, [%[a], #44]\n\t" + "ldr r2, [%[a], #48]\n\t" + "ldr r3, [%[a], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "ldr r8, [%[b], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #48]\n\t" + "str r3, [%[a], #52]\n\t" + "str r4, [%[a], #56]\n\t" + "str r5, [%[a], #60]\n\t" + "ldr r2, [%[a], #64]\n\t" + "ldr r3, [%[a], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "ldr r8, [%[b], #72]\n\t" + "ldr r9, [%[b], #76]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #64]\n\t" + "str r3, [%[a], #68]\n\t" + "str r4, [%[a], #72]\n\t" + "str r5, [%[a], #76]\n\t" + "ldr r2, [%[a], #80]\n\t" + "ldr r3, [%[a], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "ldr r8, [%[b], #88]\n\t" + "ldr r9, [%[b], #92]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #80]\n\t" + "str r3, [%[a], #84]\n\t" + "str r4, [%[a], #88]\n\t" + "str r5, [%[a], #92]\n\t" + "ldr r2, [%[a], #96]\n\t" + "ldr r3, [%[a], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "ldr r8, [%[b], #104]\n\t" + "ldr r9, [%[b], #108]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #96]\n\t" + "str r3, [%[a], #100]\n\t" + "str r4, [%[a], #104]\n\t" + "str r5, [%[a], #108]\n\t" + "ldr r2, [%[a], #112]\n\t" + "ldr r3, [%[a], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "ldr r8, [%[b], #120]\n\t" + "ldr r9, [%[b], #124]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #112]\n\t" + "str r3, [%[a], #116]\n\t" + "str r4, [%[a], #120]\n\t" + "str r5, [%[a], #124]\n\t" + "ldr r2, [%[a], #128]\n\t" + "ldr r3, [%[a], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[a], #140]\n\t" + "ldr r6, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "ldr r8, [%[b], #136]\n\t" + "ldr r9, [%[b], #140]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #128]\n\t" + "str r3, [%[a], #132]\n\t" + "str r4, [%[a], #136]\n\t" + "str r5, [%[a], #140]\n\t" + "ldr r2, [%[a], #144]\n\t" + "ldr r3, [%[a], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[a], #156]\n\t" + "ldr r6, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "ldr r8, [%[b], #152]\n\t" + "ldr r9, [%[b], #156]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #144]\n\t" + "str r3, [%[a], #148]\n\t" + "str r4, [%[a], #152]\n\t" + "str r5, [%[a], #156]\n\t" + "ldr r2, [%[a], #160]\n\t" + "ldr r3, [%[a], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[a], #172]\n\t" + "ldr r6, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "ldr r8, [%[b], #168]\n\t" + "ldr r9, [%[b], #172]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #160]\n\t" + "str r3, [%[a], #164]\n\t" + "str r4, [%[a], #168]\n\t" + "str r5, [%[a], #172]\n\t" + "ldr r2, [%[a], #176]\n\t" + "ldr r3, [%[a], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[a], #188]\n\t" + "ldr r6, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "ldr r8, [%[b], #184]\n\t" + "ldr r9, [%[b], #188]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #176]\n\t" + "str r3, [%[a], #180]\n\t" + "str r4, [%[a], #184]\n\t" + "str r5, [%[a], #188]\n\t" + "ldr r2, [%[a], #192]\n\t" + "ldr r3, [%[a], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r5, [%[a], #204]\n\t" + "ldr r6, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "ldr r8, [%[b], #200]\n\t" + "ldr r9, [%[b], #204]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #192]\n\t" + "str r3, [%[a], #196]\n\t" + "str r4, [%[a], #200]\n\t" + "str r5, [%[a], #204]\n\t" + "ldr r2, [%[a], #208]\n\t" + "ldr r3, [%[a], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r5, [%[a], #220]\n\t" + "ldr r6, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "ldr r8, [%[b], #216]\n\t" + "ldr r9, [%[b], #220]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #208]\n\t" + "str r3, [%[a], #212]\n\t" + "str r4, [%[a], #216]\n\t" + "str r5, [%[a], #220]\n\t" + "ldr r2, [%[a], #224]\n\t" + "ldr r3, [%[a], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r5, [%[a], #236]\n\t" + "ldr r6, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "ldr r8, [%[b], #232]\n\t" + "ldr r9, [%[b], #236]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #224]\n\t" + "str r3, [%[a], #228]\n\t" + "str r4, [%[a], #232]\n\t" + "str r5, [%[a], #236]\n\t" + "ldr r2, [%[a], #240]\n\t" + "ldr r3, [%[a], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r5, [%[a], #252]\n\t" + "ldr r6, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "ldr r8, [%[b], #248]\n\t" + "ldr r9, [%[b], #252]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #240]\n\t" + "str r3, [%[a], #244]\n\t" + "str r4, [%[a], #248]\n\t" + "str r5, [%[a], #252]\n\t" + "ldr r2, [%[a], #256]\n\t" + "ldr r3, [%[a], #260]\n\t" + "ldr r4, [%[a], #264]\n\t" + "ldr r5, [%[a], #268]\n\t" + "ldr r6, [%[b], #256]\n\t" + "ldr r7, [%[b], #260]\n\t" + "ldr r8, [%[b], #264]\n\t" + "ldr r9, [%[b], #268]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #256]\n\t" + "str r3, [%[a], #260]\n\t" + "str r4, [%[a], #264]\n\t" + "str r5, [%[a], #268]\n\t" + "ldr r2, [%[a], #272]\n\t" + "ldr r3, [%[a], #276]\n\t" + "ldr r4, [%[a], #280]\n\t" + "ldr r5, [%[a], #284]\n\t" + "ldr r6, [%[b], #272]\n\t" + "ldr r7, [%[b], #276]\n\t" + "ldr r8, [%[b], #280]\n\t" + "ldr r9, [%[b], #284]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #272]\n\t" + "str r3, [%[a], #276]\n\t" + "str r4, [%[a], #280]\n\t" + "str r5, [%[a], #284]\n\t" + "ldr r2, [%[a], #288]\n\t" + "ldr r3, [%[a], #292]\n\t" + "ldr r4, [%[a], #296]\n\t" + "ldr r5, [%[a], #300]\n\t" + "ldr r6, [%[b], #288]\n\t" + "ldr r7, [%[b], #292]\n\t" + "ldr r8, [%[b], #296]\n\t" + "ldr r9, [%[b], #300]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #288]\n\t" + "str r3, [%[a], #292]\n\t" + "str r4, [%[a], #296]\n\t" + "str r5, [%[a], #300]\n\t" + "ldr r2, [%[a], #304]\n\t" + "ldr r3, [%[a], #308]\n\t" + "ldr r4, [%[a], #312]\n\t" + "ldr r5, [%[a], #316]\n\t" + "ldr r6, [%[b], #304]\n\t" + "ldr r7, [%[b], #308]\n\t" + "ldr r8, [%[b], #312]\n\t" + "ldr r9, [%[b], #316]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #304]\n\t" + "str r3, [%[a], #308]\n\t" + "str r4, [%[a], #312]\n\t" + "str r5, [%[a], #316]\n\t" + "ldr r2, [%[a], #320]\n\t" + "ldr r3, [%[a], #324]\n\t" + "ldr r4, [%[a], #328]\n\t" + "ldr r5, [%[a], #332]\n\t" + "ldr r6, [%[b], #320]\n\t" + "ldr r7, [%[b], #324]\n\t" + "ldr r8, [%[b], #328]\n\t" + "ldr r9, [%[b], #332]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #320]\n\t" + "str r3, [%[a], #324]\n\t" + "str r4, [%[a], #328]\n\t" + "str r5, [%[a], #332]\n\t" + "ldr r2, [%[a], #336]\n\t" + "ldr r3, [%[a], #340]\n\t" + "ldr r4, [%[a], #344]\n\t" + "ldr r5, [%[a], #348]\n\t" + "ldr r6, [%[b], #336]\n\t" + "ldr r7, [%[b], #340]\n\t" + "ldr r8, [%[b], #344]\n\t" + "ldr r9, [%[b], #348]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #336]\n\t" + "str r3, [%[a], #340]\n\t" + "str r4, [%[a], #344]\n\t" + "str r5, [%[a], #348]\n\t" + "ldr r2, [%[a], #352]\n\t" + "ldr r3, [%[a], #356]\n\t" + "ldr r4, [%[a], #360]\n\t" + "ldr r5, [%[a], #364]\n\t" + "ldr r6, [%[b], #352]\n\t" + "ldr r7, [%[b], #356]\n\t" + "ldr r8, [%[b], #360]\n\t" + "ldr r9, [%[b], #364]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #352]\n\t" + "str r3, [%[a], #356]\n\t" + "str r4, [%[a], #360]\n\t" + "str r5, [%[a], #364]\n\t" + "ldr r2, [%[a], #368]\n\t" + "ldr r3, [%[a], #372]\n\t" + "ldr r4, [%[a], #376]\n\t" + "ldr r5, [%[a], #380]\n\t" + "ldr r6, [%[b], #368]\n\t" + "ldr r7, [%[b], #372]\n\t" + "ldr r8, [%[b], #376]\n\t" + "ldr r9, [%[b], #380]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #368]\n\t" + "str r3, [%[a], #372]\n\t" + "str r4, [%[a], #376]\n\t" + "str r5, [%[a], #380]\n\t" + "ldr r2, [%[a], #384]\n\t" + "ldr r3, [%[a], #388]\n\t" + "ldr r4, [%[a], #392]\n\t" + "ldr r5, [%[a], #396]\n\t" + "ldr r6, [%[b], #384]\n\t" + "ldr r7, [%[b], #388]\n\t" + "ldr r8, [%[b], #392]\n\t" + "ldr r9, [%[b], #396]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #384]\n\t" + "str r3, [%[a], #388]\n\t" + "str r4, [%[a], #392]\n\t" + "str r5, [%[a], #396]\n\t" + "ldr r2, [%[a], #400]\n\t" + "ldr r3, [%[a], #404]\n\t" + "ldr r4, [%[a], #408]\n\t" + "ldr r5, [%[a], #412]\n\t" + "ldr r6, [%[b], #400]\n\t" + "ldr r7, [%[b], #404]\n\t" + "ldr r8, [%[b], #408]\n\t" + "ldr r9, [%[b], #412]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #400]\n\t" + "str r3, [%[a], #404]\n\t" + "str r4, [%[a], #408]\n\t" + "str r5, [%[a], #412]\n\t" + "ldr r2, [%[a], #416]\n\t" + "ldr r3, [%[a], #420]\n\t" + "ldr r4, [%[a], #424]\n\t" + "ldr r5, [%[a], #428]\n\t" + "ldr r6, [%[b], #416]\n\t" + "ldr r7, [%[b], #420]\n\t" + "ldr r8, [%[b], #424]\n\t" + "ldr r9, [%[b], #428]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #416]\n\t" + "str r3, [%[a], #420]\n\t" + "str r4, [%[a], #424]\n\t" + "str r5, [%[a], #428]\n\t" + "ldr r2, [%[a], #432]\n\t" + "ldr r3, [%[a], #436]\n\t" + "ldr r4, [%[a], #440]\n\t" + "ldr r5, [%[a], #444]\n\t" + "ldr r6, [%[b], #432]\n\t" + "ldr r7, [%[b], #436]\n\t" + "ldr r8, [%[b], #440]\n\t" + "ldr r9, [%[b], #444]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #432]\n\t" + "str r3, [%[a], #436]\n\t" + "str r4, [%[a], #440]\n\t" + "str r5, [%[a], #444]\n\t" + "ldr r2, [%[a], #448]\n\t" + "ldr r3, [%[a], #452]\n\t" + "ldr r4, [%[a], #456]\n\t" + "ldr r5, [%[a], #460]\n\t" + "ldr r6, [%[b], #448]\n\t" + "ldr r7, [%[b], #452]\n\t" + "ldr r8, [%[b], #456]\n\t" + "ldr r9, [%[b], #460]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #448]\n\t" + "str r3, [%[a], #452]\n\t" + "str r4, [%[a], #456]\n\t" + "str r5, [%[a], #460]\n\t" + "ldr r2, [%[a], #464]\n\t" + "ldr r3, [%[a], #468]\n\t" + "ldr r4, [%[a], #472]\n\t" + "ldr r5, [%[a], #476]\n\t" + "ldr r6, [%[b], #464]\n\t" + "ldr r7, [%[b], #468]\n\t" + "ldr r8, [%[b], #472]\n\t" + "ldr r9, [%[b], #476]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #464]\n\t" + "str r3, [%[a], #468]\n\t" + "str r4, [%[a], #472]\n\t" + "str r5, [%[a], #476]\n\t" + "ldr r2, [%[a], #480]\n\t" + "ldr r3, [%[a], #484]\n\t" + "ldr r4, [%[a], #488]\n\t" + "ldr r5, [%[a], #492]\n\t" + "ldr r6, [%[b], #480]\n\t" + "ldr r7, [%[b], #484]\n\t" + "ldr r8, [%[b], #488]\n\t" + "ldr r9, [%[b], #492]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #480]\n\t" + "str r3, [%[a], #484]\n\t" + "str r4, [%[a], #488]\n\t" + "str r5, [%[a], #492]\n\t" + "ldr r2, [%[a], #496]\n\t" + "ldr r3, [%[a], #500]\n\t" + "ldr r4, [%[a], #504]\n\t" + "ldr r5, [%[a], #508]\n\t" + "ldr r6, [%[b], #496]\n\t" + "ldr r7, [%[b], #500]\n\t" + "ldr r8, [%[b], #504]\n\t" + "ldr r9, [%[b], #508]\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "str r2, [%[a], #496]\n\t" + "str r3, [%[a], #500]\n\t" + "str r4, [%[a], #504]\n\t" + "str r5, [%[a], #508]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" + "ldr r8, [%[b], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "ldr r10, [%[b], #8]\n\t" + "ldr r14, [%[b], #12]\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" + "ldr r8, [%[b], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "ldr r10, [%[b], #24]\n\t" + "ldr r14, [%[b], #28]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "str r6, [%[r], #24]\n\t" + "str r7, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" + "ldr r8, [%[b], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "ldr r10, [%[b], #40]\n\t" + "ldr r14, [%[b], #44]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "str r6, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" + "ldr r8, [%[b], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "ldr r10, [%[b], #56]\n\t" + "ldr r14, [%[b], #60]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" + "str r6, [%[r], #56]\n\t" + "str r7, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[a], #72]\n\t" + "ldr r7, [%[a], #76]\n\t" + "ldr r8, [%[b], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "ldr r10, [%[b], #72]\n\t" + "ldr r14, [%[b], #76]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" + "str r6, [%[r], #72]\n\t" + "str r7, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[a], #88]\n\t" + "ldr r7, [%[a], #92]\n\t" + "ldr r8, [%[b], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "ldr r10, [%[b], #88]\n\t" + "ldr r14, [%[b], #92]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" + "str r6, [%[r], #88]\n\t" + "str r7, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[a], #104]\n\t" + "ldr r7, [%[a], #108]\n\t" + "ldr r8, [%[b], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "ldr r10, [%[b], #104]\n\t" + "ldr r14, [%[b], #108]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" + "str r6, [%[r], #104]\n\t" + "str r7, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[a], #120]\n\t" + "ldr r7, [%[a], #124]\n\t" + "ldr r8, [%[b], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "ldr r10, [%[b], #120]\n\t" + "ldr r14, [%[b], #124]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" + "str r6, [%[r], #120]\n\t" + "str r7, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[a], #132]\n\t" + "ldr r6, [%[a], #136]\n\t" + "ldr r7, [%[a], #140]\n\t" + "ldr r8, [%[b], #128]\n\t" + "ldr r9, [%[b], #132]\n\t" + "ldr r10, [%[b], #136]\n\t" + "ldr r14, [%[b], #140]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #132]\n\t" + "str r6, [%[r], #136]\n\t" + "str r7, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[a], #148]\n\t" + "ldr r6, [%[a], #152]\n\t" + "ldr r7, [%[a], #156]\n\t" + "ldr r8, [%[b], #144]\n\t" + "ldr r9, [%[b], #148]\n\t" + "ldr r10, [%[b], #152]\n\t" + "ldr r14, [%[b], #156]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #144]\n\t" + "str r5, [%[r], #148]\n\t" + "str r6, [%[r], #152]\n\t" + "str r7, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[a], #164]\n\t" + "ldr r6, [%[a], #168]\n\t" + "ldr r7, [%[a], #172]\n\t" + "ldr r8, [%[b], #160]\n\t" + "ldr r9, [%[b], #164]\n\t" + "ldr r10, [%[b], #168]\n\t" + "ldr r14, [%[b], #172]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #160]\n\t" + "str r5, [%[r], #164]\n\t" + "str r6, [%[r], #168]\n\t" + "str r7, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[a], #180]\n\t" + "ldr r6, [%[a], #184]\n\t" + "ldr r7, [%[a], #188]\n\t" + "ldr r8, [%[b], #176]\n\t" + "ldr r9, [%[b], #180]\n\t" + "ldr r10, [%[b], #184]\n\t" + "ldr r14, [%[b], #188]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #176]\n\t" + "str r5, [%[r], #180]\n\t" + "str r6, [%[r], #184]\n\t" + "str r7, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[a], #196]\n\t" + "ldr r6, [%[a], #200]\n\t" + "ldr r7, [%[a], #204]\n\t" + "ldr r8, [%[b], #192]\n\t" + "ldr r9, [%[b], #196]\n\t" + "ldr r10, [%[b], #200]\n\t" + "ldr r14, [%[b], #204]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #192]\n\t" + "str r5, [%[r], #196]\n\t" + "str r6, [%[r], #200]\n\t" + "str r7, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[a], #212]\n\t" + "ldr r6, [%[a], #216]\n\t" + "ldr r7, [%[a], #220]\n\t" + "ldr r8, [%[b], #208]\n\t" + "ldr r9, [%[b], #212]\n\t" + "ldr r10, [%[b], #216]\n\t" + "ldr r14, [%[b], #220]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #208]\n\t" + "str r5, [%[r], #212]\n\t" + "str r6, [%[r], #216]\n\t" + "str r7, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[a], #228]\n\t" + "ldr r6, [%[a], #232]\n\t" + "ldr r7, [%[a], #236]\n\t" + "ldr r8, [%[b], #224]\n\t" + "ldr r9, [%[b], #228]\n\t" + "ldr r10, [%[b], #232]\n\t" + "ldr r14, [%[b], #236]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #224]\n\t" + "str r5, [%[r], #228]\n\t" + "str r6, [%[r], #232]\n\t" + "str r7, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[a], #244]\n\t" + "ldr r6, [%[a], #248]\n\t" + "ldr r7, [%[a], #252]\n\t" + "ldr r8, [%[b], #240]\n\t" + "ldr r9, [%[b], #244]\n\t" + "ldr r10, [%[b], #248]\n\t" + "ldr r14, [%[b], #252]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #240]\n\t" + "str r5, [%[r], #244]\n\t" + "str r6, [%[r], #248]\n\t" + "str r7, [%[r], #252]\n\t" + "ldr r4, [%[a], #256]\n\t" + "ldr r5, [%[a], #260]\n\t" + "ldr r6, [%[a], #264]\n\t" + "ldr r7, [%[a], #268]\n\t" + "ldr r8, [%[b], #256]\n\t" + "ldr r9, [%[b], #260]\n\t" + "ldr r10, [%[b], #264]\n\t" + "ldr r14, [%[b], #268]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #256]\n\t" + "str r5, [%[r], #260]\n\t" + "str r6, [%[r], #264]\n\t" + "str r7, [%[r], #268]\n\t" + "ldr r4, [%[a], #272]\n\t" + "ldr r5, [%[a], #276]\n\t" + "ldr r6, [%[a], #280]\n\t" + "ldr r7, [%[a], #284]\n\t" + "ldr r8, [%[b], #272]\n\t" + "ldr r9, [%[b], #276]\n\t" + "ldr r10, [%[b], #280]\n\t" + "ldr r14, [%[b], #284]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #272]\n\t" + "str r5, [%[r], #276]\n\t" + "str r6, [%[r], #280]\n\t" + "str r7, [%[r], #284]\n\t" + "ldr r4, [%[a], #288]\n\t" + "ldr r5, [%[a], #292]\n\t" + "ldr r6, [%[a], #296]\n\t" + "ldr r7, [%[a], #300]\n\t" + "ldr r8, [%[b], #288]\n\t" + "ldr r9, [%[b], #292]\n\t" + "ldr r10, [%[b], #296]\n\t" + "ldr r14, [%[b], #300]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #288]\n\t" + "str r5, [%[r], #292]\n\t" + "str r6, [%[r], #296]\n\t" + "str r7, [%[r], #300]\n\t" + "ldr r4, [%[a], #304]\n\t" + "ldr r5, [%[a], #308]\n\t" + "ldr r6, [%[a], #312]\n\t" + "ldr r7, [%[a], #316]\n\t" + "ldr r8, [%[b], #304]\n\t" + "ldr r9, [%[b], #308]\n\t" + "ldr r10, [%[b], #312]\n\t" + "ldr r14, [%[b], #316]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #304]\n\t" + "str r5, [%[r], #308]\n\t" + "str r6, [%[r], #312]\n\t" + "str r7, [%[r], #316]\n\t" + "ldr r4, [%[a], #320]\n\t" + "ldr r5, [%[a], #324]\n\t" + "ldr r6, [%[a], #328]\n\t" + "ldr r7, [%[a], #332]\n\t" + "ldr r8, [%[b], #320]\n\t" + "ldr r9, [%[b], #324]\n\t" + "ldr r10, [%[b], #328]\n\t" + "ldr r14, [%[b], #332]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #320]\n\t" + "str r5, [%[r], #324]\n\t" + "str r6, [%[r], #328]\n\t" + "str r7, [%[r], #332]\n\t" + "ldr r4, [%[a], #336]\n\t" + "ldr r5, [%[a], #340]\n\t" + "ldr r6, [%[a], #344]\n\t" + "ldr r7, [%[a], #348]\n\t" + "ldr r8, [%[b], #336]\n\t" + "ldr r9, [%[b], #340]\n\t" + "ldr r10, [%[b], #344]\n\t" + "ldr r14, [%[b], #348]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #336]\n\t" + "str r5, [%[r], #340]\n\t" + "str r6, [%[r], #344]\n\t" + "str r7, [%[r], #348]\n\t" + "ldr r4, [%[a], #352]\n\t" + "ldr r5, [%[a], #356]\n\t" + "ldr r6, [%[a], #360]\n\t" + "ldr r7, [%[a], #364]\n\t" + "ldr r8, [%[b], #352]\n\t" + "ldr r9, [%[b], #356]\n\t" + "ldr r10, [%[b], #360]\n\t" + "ldr r14, [%[b], #364]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #352]\n\t" + "str r5, [%[r], #356]\n\t" + "str r6, [%[r], #360]\n\t" + "str r7, [%[r], #364]\n\t" + "ldr r4, [%[a], #368]\n\t" + "ldr r5, [%[a], #372]\n\t" + "ldr r6, [%[a], #376]\n\t" + "ldr r7, [%[a], #380]\n\t" + "ldr r8, [%[b], #368]\n\t" + "ldr r9, [%[b], #372]\n\t" + "ldr r10, [%[b], #376]\n\t" + "ldr r14, [%[b], #380]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #368]\n\t" + "str r5, [%[r], #372]\n\t" + "str r6, [%[r], #376]\n\t" + "str r7, [%[r], #380]\n\t" + "ldr r4, [%[a], #384]\n\t" + "ldr r5, [%[a], #388]\n\t" + "ldr r6, [%[a], #392]\n\t" + "ldr r7, [%[a], #396]\n\t" + "ldr r8, [%[b], #384]\n\t" + "ldr r9, [%[b], #388]\n\t" + "ldr r10, [%[b], #392]\n\t" + "ldr r14, [%[b], #396]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #384]\n\t" + "str r5, [%[r], #388]\n\t" + "str r6, [%[r], #392]\n\t" + "str r7, [%[r], #396]\n\t" + "ldr r4, [%[a], #400]\n\t" + "ldr r5, [%[a], #404]\n\t" + "ldr r6, [%[a], #408]\n\t" + "ldr r7, [%[a], #412]\n\t" + "ldr r8, [%[b], #400]\n\t" + "ldr r9, [%[b], #404]\n\t" + "ldr r10, [%[b], #408]\n\t" + "ldr r14, [%[b], #412]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #400]\n\t" + "str r5, [%[r], #404]\n\t" + "str r6, [%[r], #408]\n\t" + "str r7, [%[r], #412]\n\t" + "ldr r4, [%[a], #416]\n\t" + "ldr r5, [%[a], #420]\n\t" + "ldr r6, [%[a], #424]\n\t" + "ldr r7, [%[a], #428]\n\t" + "ldr r8, [%[b], #416]\n\t" + "ldr r9, [%[b], #420]\n\t" + "ldr r10, [%[b], #424]\n\t" + "ldr r14, [%[b], #428]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #416]\n\t" + "str r5, [%[r], #420]\n\t" + "str r6, [%[r], #424]\n\t" + "str r7, [%[r], #428]\n\t" + "ldr r4, [%[a], #432]\n\t" + "ldr r5, [%[a], #436]\n\t" + "ldr r6, [%[a], #440]\n\t" + "ldr r7, [%[a], #444]\n\t" + "ldr r8, [%[b], #432]\n\t" + "ldr r9, [%[b], #436]\n\t" + "ldr r10, [%[b], #440]\n\t" + "ldr r14, [%[b], #444]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #432]\n\t" + "str r5, [%[r], #436]\n\t" + "str r6, [%[r], #440]\n\t" + "str r7, [%[r], #444]\n\t" + "ldr r4, [%[a], #448]\n\t" + "ldr r5, [%[a], #452]\n\t" + "ldr r6, [%[a], #456]\n\t" + "ldr r7, [%[a], #460]\n\t" + "ldr r8, [%[b], #448]\n\t" + "ldr r9, [%[b], #452]\n\t" + "ldr r10, [%[b], #456]\n\t" + "ldr r14, [%[b], #460]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #448]\n\t" + "str r5, [%[r], #452]\n\t" + "str r6, [%[r], #456]\n\t" + "str r7, [%[r], #460]\n\t" + "ldr r4, [%[a], #464]\n\t" + "ldr r5, [%[a], #468]\n\t" + "ldr r6, [%[a], #472]\n\t" + "ldr r7, [%[a], #476]\n\t" + "ldr r8, [%[b], #464]\n\t" + "ldr r9, [%[b], #468]\n\t" + "ldr r10, [%[b], #472]\n\t" + "ldr r14, [%[b], #476]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #464]\n\t" + "str r5, [%[r], #468]\n\t" + "str r6, [%[r], #472]\n\t" + "str r7, [%[r], #476]\n\t" + "ldr r4, [%[a], #480]\n\t" + "ldr r5, [%[a], #484]\n\t" + "ldr r6, [%[a], #488]\n\t" + "ldr r7, [%[a], #492]\n\t" + "ldr r8, [%[b], #480]\n\t" + "ldr r9, [%[b], #484]\n\t" + "ldr r10, [%[b], #488]\n\t" + "ldr r14, [%[b], #492]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #480]\n\t" + "str r5, [%[r], #484]\n\t" + "str r6, [%[r], #488]\n\t" + "str r7, [%[r], #492]\n\t" + "ldr r4, [%[a], #496]\n\t" + "ldr r5, [%[a], #500]\n\t" + "ldr r6, [%[a], #504]\n\t" + "ldr r7, [%[a], #508]\n\t" + "ldr r8, [%[b], #496]\n\t" + "ldr r9, [%[b], #500]\n\t" + "ldr r10, [%[b], #504]\n\t" + "ldr r14, [%[b], #508]\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r], #496]\n\t" + "str r5, [%[r], #500]\n\t" + "str r6, [%[r], #504]\n\t" + "str r7, [%[r], #508]\n\t" + "adc %[c], r12, r12\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #256\n\t" + "mov r10, #0\n\t" + "# A[0] * B[0]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r3, r4, r8, r9\n\t" + "mov r5, #0\n\t" + "str r3, [sp]\n\t" + "# A[0] * B[1]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[0]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #4]\n\t" + "# A[0] * B[2]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[1]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[0]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #8]\n\t" + "# A[0] * B[3]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[2]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[1]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[0]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #12]\n\t" + "# A[0] * B[4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[3]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[2]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[1]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[0]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #16]\n\t" + "# A[0] * B[5]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[4]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[3]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[2]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[1]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[0]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #20]\n\t" + "# A[0] * B[6]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[5]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[4]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[3]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[2]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[1]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[0]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #24]\n\t" + "# A[0] * B[7]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[6]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[5]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[4]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[3]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[2]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[1]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[0]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #28]\n\t" + "# A[0] * B[8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[7]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[6]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[5]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[4]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[3]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[2]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[1]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[0]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #32]\n\t" + "# A[0] * B[9]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[7]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[6]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[5]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[4]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[3]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[2]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[1]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[0]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #36]\n\t" + "# A[0] * B[10]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[9]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[8]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[7]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[6]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[5]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[4]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[3]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[2]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[1]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[0]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #40]\n\t" + "# A[0] * B[11]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[10]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[9]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[8]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[7]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[6]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[5]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[4]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[3]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[2]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[1]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[0]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #44]\n\t" + "# A[0] * B[12]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[11]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[10]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[9]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[8]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[7]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[6]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[5]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[4]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[3]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[2]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[1]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[0]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #48]\n\t" + "# A[0] * B[13]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[12]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[11]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[10]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[9]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[8]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[7]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[6]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[5]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[4]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[3]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[2]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[1]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[0]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #52]\n\t" + "# A[0] * B[14]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[13]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[12]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[11]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[10]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[9]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[8]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[7]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[6]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[5]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[4]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[3]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[2]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[1]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[0]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #56]\n\t" + "# A[0] * B[15]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[14]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[13]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[12]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[11]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[10]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[9]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[8]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[7]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[6]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[5]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[4]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[3]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[2]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[1]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[0]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #60]\n\t" + "# A[0] * B[16]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[15]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[14]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[13]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[12]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[11]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[10]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[9]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[8]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[7]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[6]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[5]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[4]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[3]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[2]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[1]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[0]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #64]\n\t" + "# A[0] * B[17]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[16]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[15]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[14]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[13]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[12]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[11]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[10]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[9]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[8]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[7]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[6]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[5]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[4]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[3]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[2]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[1]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[0]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #68]\n\t" + "# A[0] * B[18]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[17]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[16]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[15]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[14]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[13]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[12]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[11]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[10]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[9]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[8]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[7]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[6]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[5]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[4]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[3]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[2]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[1]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[0]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #72]\n\t" + "# A[0] * B[19]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[18]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[17]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[16]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[15]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[14]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[13]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[12]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[11]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[10]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[9]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[8]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[7]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[6]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[5]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[4]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[3]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[2]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[1]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[0]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #76]\n\t" + "# A[0] * B[20]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[19]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[18]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[17]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[16]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[15]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[14]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[13]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[12]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[11]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[10]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[9]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[8]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[7]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[6]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[5]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[4]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[3]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[2]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[1]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[0]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #80]\n\t" + "# A[0] * B[21]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[20]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[19]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[18]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[17]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[16]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[15]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[14]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[13]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[12]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[11]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[10]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[9]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[8]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[7]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[6]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[5]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[4]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[3]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[2]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[1]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[0]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #84]\n\t" + "# A[0] * B[22]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[21]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[20]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[19]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[18]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[17]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[16]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[15]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[14]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[13]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[12]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[11]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[10]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[9]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[8]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[7]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[6]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[5]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[4]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[3]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[2]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[1]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[0]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #88]\n\t" + "# A[0] * B[23]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[22]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[21]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[20]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[19]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[18]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[17]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[16]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[15]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[14]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[13]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[12]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[11]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[10]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[9]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[8]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[7]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[6]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[5]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[4]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[3]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[2]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[1]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[0]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #92]\n\t" + "# A[0] * B[24]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[23]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[22]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[21]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[20]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[19]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[18]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[17]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[16]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[15]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[14]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[13]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[12]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[11]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[10]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[9]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[8]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[7]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[6]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[5]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[4]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[3]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[2]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[1]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[0]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #96]\n\t" + "# A[0] * B[25]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[24]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[23]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[22]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[21]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[20]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[19]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[18]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[17]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[16]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[15]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[14]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[13]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[12]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[11]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[10]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[9]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[8]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[7]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[6]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[5]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[4]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[3]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[2]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[1]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[0]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #100]\n\t" + "# A[0] * B[26]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[25]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[24]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[23]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[22]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[21]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[20]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[19]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[18]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[17]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[16]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[15]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[14]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[13]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[12]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[11]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[10]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[9]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[8]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[7]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[6]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[5]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[4]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[3]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[2]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[1]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[0]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #104]\n\t" + "# A[0] * B[27]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[26]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[25]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[24]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[23]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[22]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[21]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[20]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[19]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[18]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[17]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[16]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[15]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[14]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[13]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[12]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[11]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[10]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[9]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[8]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[7]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[6]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[5]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[4]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[3]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[2]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[1]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[0]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #108]\n\t" + "# A[0] * B[28]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[27]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[26]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[25]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[24]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[23]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[22]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[21]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[20]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[19]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[18]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[17]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[16]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[15]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[14]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[13]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[12]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[11]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[10]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[9]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[8]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[7]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[6]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[5]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[4]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[3]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[2]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[1]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[0]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #112]\n\t" + "# A[0] * B[29]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[28]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[27]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[26]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[25]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[24]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[23]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[22]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[21]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[20]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[19]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[18]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[17]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[16]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[15]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[14]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[13]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[12]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[11]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[10]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[9]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[8]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[7]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[6]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[5]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[4]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[3]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[2]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[1]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[0]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #116]\n\t" + "# A[0] * B[30]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[29]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[28]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[27]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[26]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[25]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[24]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[23]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[22]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[21]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[20]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[19]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[18]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[17]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[16]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[15]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[14]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[13]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[12]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[11]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[10]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[9]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[8]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[7]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[6]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[5]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[4]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[3]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[2]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[1]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[0]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #120]\n\t" + "# A[0] * B[31]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[30]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[29]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[28]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[27]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[26]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[25]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[24]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[23]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[22]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[21]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[20]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[19]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[18]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[17]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[16]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[15]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[14]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[13]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[12]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[11]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[10]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[9]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[8]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[7]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[6]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[5]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[4]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[3]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[2]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[1]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[0]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #124]\n\t" + "# A[0] * B[32]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[31]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[30]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[29]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[28]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[27]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[26]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[25]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[24]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[23]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[22]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[21]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[20]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[19]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[18]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[17]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[16]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[15]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[14]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[13]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[12]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[11]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[10]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[9]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[8]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[7]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[6]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[5]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[4]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[3]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[2]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[1]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[0]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #128]\n\t" + "# A[0] * B[33]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[32]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[31]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[30]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[29]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[28]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[27]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[26]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[25]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[24]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[23]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[22]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[21]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[20]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[19]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[18]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[17]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[16]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[15]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[14]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[13]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[12]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[11]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[10]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[9]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[8]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[7]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[6]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[5]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[4]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[3]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[2]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[1]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[0]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #132]\n\t" + "# A[0] * B[34]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[33]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[32]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[31]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[30]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[29]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[28]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[27]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[26]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[25]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[24]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[23]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[22]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[21]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[20]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[19]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[18]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[17]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[16]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[15]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[14]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[13]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[12]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[11]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[10]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[9]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[8]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[7]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[6]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[5]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[4]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[3]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[2]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[1]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[0]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #136]\n\t" + "# A[0] * B[35]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[34]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[33]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[32]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[31]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[30]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[29]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[28]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[27]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[26]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[25]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[24]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[23]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[22]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[21]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[20]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[19]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[18]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[17]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[16]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[15]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[14]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[13]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[12]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[11]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[10]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[9]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[8]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[7]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[6]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[5]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[4]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[3]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[2]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[1]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[0]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #140]\n\t" + "# A[0] * B[36]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[35]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[34]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[33]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[32]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[31]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[30]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[29]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[28]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[27]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[26]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[25]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[24]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[23]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[22]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[21]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[20]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[19]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[18]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[17]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[16]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[15]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[14]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[13]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[12]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[11]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[10]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[9]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[8]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[7]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[6]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[5]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[4]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[3]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[2]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[1]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[0]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #144]\n\t" + "# A[0] * B[37]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[36]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[35]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[34]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[33]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[32]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[31]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[30]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[29]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[28]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[27]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[26]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[25]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[24]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[23]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[22]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[21]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[20]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[19]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[18]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[17]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[16]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[15]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[14]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[13]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[12]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[11]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[10]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[9]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[8]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[7]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[6]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[5]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[4]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[3]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[2]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[1]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[0]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #148]\n\t" + "# A[0] * B[38]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[37]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[36]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[35]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[34]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[33]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[32]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[31]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[30]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[29]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[28]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[27]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[26]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[25]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[24]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[23]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[22]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[21]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[20]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[19]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[18]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[17]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[16]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[15]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[14]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[13]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[12]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[11]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[10]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[9]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[8]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[7]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[6]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[5]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[4]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[3]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[2]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[1]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[0]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #152]\n\t" + "# A[0] * B[39]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[38]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[37]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[36]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[35]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[34]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[33]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[32]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[31]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[30]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[29]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[28]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[27]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[26]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[25]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[24]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[23]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[22]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[21]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[20]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[19]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[18]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[17]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[16]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[15]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[14]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[13]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[12]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[11]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[10]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[9]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[8]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[7]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[6]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[5]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[4]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[3]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[2]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[1]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[0]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #156]\n\t" + "# A[0] * B[40]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[39]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[38]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[37]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[36]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[35]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[34]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[33]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[32]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[31]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[30]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[29]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[28]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[27]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[26]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[25]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[24]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[23]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[22]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[21]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[20]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[19]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[18]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[17]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[16]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[15]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[14]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[13]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[12]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[11]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[10]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[9]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[8]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[7]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[6]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[5]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[4]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[3]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[2]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[1]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[0]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #160]\n\t" + "# A[0] * B[41]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[40]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[39]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[38]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[37]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[36]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[35]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[34]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[33]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[32]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[31]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[30]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[29]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[28]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[27]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[26]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[25]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[24]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[23]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[22]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[21]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[20]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[19]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[18]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[17]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[16]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[15]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[14]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[13]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[12]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[11]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[10]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[9]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[8]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[7]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[6]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[5]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[4]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[3]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[2]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[1]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[0]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #164]\n\t" + "# A[0] * B[42]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[41]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[40]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[39]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[38]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[37]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[36]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[35]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[34]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[33]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[32]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[31]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[30]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[29]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[28]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[27]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[26]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[25]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[24]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[23]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[22]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[21]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[20]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[19]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[18]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[17]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[16]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[15]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[14]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[13]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[12]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[11]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[10]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[9]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[8]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[7]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[6]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[5]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[4]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[3]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[2]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[1]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[0]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #168]\n\t" + "# A[0] * B[43]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[42]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[41]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[40]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[39]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[38]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[37]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[36]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[35]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[34]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[33]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[32]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[31]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[30]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[29]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[28]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[27]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[26]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[25]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[24]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[23]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[22]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[21]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[20]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[19]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[18]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[17]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[16]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[15]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[14]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[13]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[12]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[11]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[10]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[9]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[8]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[7]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[6]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[5]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[4]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[3]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[2]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[1]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[0]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #172]\n\t" + "# A[0] * B[44]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[43]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[42]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[41]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[40]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[39]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[38]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[37]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[36]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[35]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[34]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[33]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[32]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[31]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[30]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[29]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[28]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[27]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[26]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[25]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[24]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[23]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[22]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[21]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[20]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[19]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[18]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[17]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[16]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[15]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[14]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[13]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[12]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[11]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[10]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[9]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[8]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[7]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[6]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[5]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[4]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[3]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[2]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[1]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[0]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #176]\n\t" + "# A[0] * B[45]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[44]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[43]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[42]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[41]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[40]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[39]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[38]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[37]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[36]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[35]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[34]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[33]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[32]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[31]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[30]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[29]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[28]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[27]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[26]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[25]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[24]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[23]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[22]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[21]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[20]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[19]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[18]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[17]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[16]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[15]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[14]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[13]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[12]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[11]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[10]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[9]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[8]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[7]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[6]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[5]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[4]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[3]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[2]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[1]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[0]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #180]\n\t" + "# A[0] * B[46]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[45]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[44]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[43]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[42]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[41]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[40]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[39]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[38]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[37]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[36]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[35]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[34]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[33]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[32]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[31]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[30]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[29]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[28]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[27]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[26]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[25]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[24]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[23]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[22]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[21]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[20]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[19]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[18]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[17]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[16]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[15]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[14]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[13]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[12]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[11]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[10]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[9]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[8]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[7]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[6]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[5]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[4]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[3]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[2]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[1]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[0]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #184]\n\t" + "# A[0] * B[47]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[46]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[45]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[44]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[43]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[42]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[41]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[40]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[39]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[38]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[37]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[36]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[35]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[34]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[33]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[32]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[31]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[30]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[29]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[28]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[27]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[26]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[25]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[24]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[23]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[22]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[21]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[20]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[19]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[18]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[17]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[16]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[15]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[14]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[13]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[12]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[11]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[10]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[9]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[8]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[7]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[6]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[5]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[4]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[3]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[2]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[1]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[0]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #188]\n\t" + "# A[0] * B[48]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[47]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[46]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[45]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[44]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[43]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[42]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[41]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[40]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[39]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[38]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[37]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[36]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[35]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[34]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[33]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[32]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[31]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[30]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[29]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[28]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[27]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[26]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[25]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[24]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[23]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[22]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[21]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[20]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[19]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[18]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[17]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[16]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[15]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[14]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[13]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[12]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[11]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[10]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[9]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[8]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[7]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[6]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[5]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[4]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[3]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[2]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[1]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[0]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #192]\n\t" + "# A[0] * B[49]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[48]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[47]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[46]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[45]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[44]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[43]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[42]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[41]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[40]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[39]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[38]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[37]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[36]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[35]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[34]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[33]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[32]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[31]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[30]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[29]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[28]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[27]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[26]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[25]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[24]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[23]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[22]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[21]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[20]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[19]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[18]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[17]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[16]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[15]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[14]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[13]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[12]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[11]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[10]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[9]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[8]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[7]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[6]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[5]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[4]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[3]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[2]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[1]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[0]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #196]\n\t" + "# A[0] * B[50]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[49]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[48]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[47]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[46]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[45]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[44]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[43]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[42]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[41]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[40]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[39]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[38]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[37]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[36]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[35]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[34]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[33]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[32]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[31]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[30]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[29]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[28]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[27]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[26]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[25]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[24]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[23]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[22]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[21]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[20]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[19]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[18]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[17]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[16]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[15]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[14]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[13]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[12]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[11]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[10]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[9]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[8]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[7]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[6]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[5]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[4]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[3]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[2]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[1]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[0]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #200]\n\t" + "# A[0] * B[51]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[50]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[49]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[48]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[47]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[46]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[45]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[44]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[43]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[42]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[41]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[40]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[39]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[38]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[37]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[36]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[35]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[34]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[33]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[32]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[31]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[30]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[29]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[28]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[27]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[26]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[25]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[24]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[23]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[22]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[21]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[20]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[19]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[18]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[17]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[16]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[15]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[14]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[13]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[12]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[11]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[10]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[9]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[8]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[7]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[6]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[5]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[4]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[3]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[2]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[1]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[0]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #204]\n\t" + "# A[0] * B[52]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[51]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[50]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[49]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[48]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[47]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[46]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[45]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[44]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[43]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[42]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[41]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[40]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[39]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[38]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[37]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[36]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[35]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[34]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[33]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[32]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[31]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[30]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[29]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[28]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[27]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[26]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[25]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[24]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[23]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[22]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[21]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[20]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[19]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[18]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[17]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[16]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[15]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[14]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[13]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[12]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[11]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[10]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[9]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[8]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[7]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[6]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[5]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[4]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[3]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[2]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[1]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[0]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #208]\n\t" + "# A[0] * B[53]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[52]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[51]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[50]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[49]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[48]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[47]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[46]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[45]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[44]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[43]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[42]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[41]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[40]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[39]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[38]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[37]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[36]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[35]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[34]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[33]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[32]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[31]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[30]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[29]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[28]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[27]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[26]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[25]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[24]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[23]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[22]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[21]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[20]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[19]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[18]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[17]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[16]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[15]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[14]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[13]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[12]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[11]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[10]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[9]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[8]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[7]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[6]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[5]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[4]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[3]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[2]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[1]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[0]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #212]\n\t" + "# A[0] * B[54]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[53]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[52]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[51]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[50]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[49]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[48]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[47]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[46]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[45]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[44]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[43]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[42]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[41]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[40]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[39]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[38]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[37]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[36]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[35]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[34]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[33]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[32]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[31]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[30]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[29]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[28]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[27]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[26]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[25]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[24]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[23]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[22]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[21]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[20]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[19]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[18]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[17]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[16]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[15]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[14]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[13]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[12]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[11]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[10]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[9]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[8]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[7]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[6]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[5]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[4]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[3]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[2]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[1]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[0]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #216]\n\t" + "# A[0] * B[55]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[54]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[53]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[52]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[51]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[50]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[49]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[48]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[47]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[46]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[45]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[44]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[43]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[42]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[41]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[40]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[39]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[38]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[37]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[36]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[35]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[34]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[33]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[32]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[31]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[30]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[29]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[28]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[27]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[26]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[25]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[24]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[23]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[22]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[21]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[20]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[19]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[18]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[17]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[16]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[15]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[14]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[13]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[12]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[11]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[10]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[9]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[8]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[7]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[6]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[5]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[4]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[3]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[2]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[1]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[0]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #220]\n\t" + "# A[0] * B[56]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[55]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[54]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[53]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[52]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[51]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[50]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[49]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[48]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[47]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[46]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[45]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[44]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[43]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[42]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[41]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[40]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[39]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[38]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[37]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[36]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[35]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[34]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[33]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[32]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[31]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[30]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[29]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[28]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[27]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[26]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[25]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[24]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[23]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[22]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[21]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[20]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[19]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[18]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[17]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[16]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[15]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[14]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[13]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[12]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[11]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[10]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[9]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[8]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[7]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[6]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[5]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[4]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[3]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[2]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[1]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[0]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #224]\n\t" + "# A[0] * B[57]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[56]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[55]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[54]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[53]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[52]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[51]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[50]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[49]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[48]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[47]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[46]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[45]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[44]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[43]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[42]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[41]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[40]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[39]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[38]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[37]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[36]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[35]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[34]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[33]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[32]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[31]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[30]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[29]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[28]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[27]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[26]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[25]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[24]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[23]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[22]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[21]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[20]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[19]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[18]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[17]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[16]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[15]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[14]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[13]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[12]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[11]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[10]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[9]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[8]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[7]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[6]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[5]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[4]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[3]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[2]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[1]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[0]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #228]\n\t" + "# A[0] * B[58]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[57]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[56]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[55]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[54]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[53]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[52]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[51]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[50]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[49]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[48]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[47]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[46]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[45]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[44]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[43]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[42]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[41]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[40]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[39]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[38]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[37]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[36]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[35]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[34]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[33]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[32]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[31]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[30]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[29]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[28]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[27]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[26]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[25]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[24]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[23]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[22]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[21]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[20]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[19]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[18]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[17]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[16]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[15]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[14]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[13]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[12]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[11]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[10]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[9]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[8]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[7]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[6]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[5]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[4]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[3]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[2]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[1]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[0]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #232]\n\t" + "# A[0] * B[59]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[58]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[57]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[56]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[55]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[54]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[53]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[52]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[51]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[50]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[49]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[48]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[47]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[46]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[45]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[44]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[43]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[42]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[41]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[40]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[39]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[38]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[37]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[36]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[35]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[34]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[33]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[32]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[31]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[30]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[29]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[28]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[27]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[26]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[25]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[24]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[23]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[22]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[21]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[20]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[19]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[18]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[17]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[16]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[15]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[14]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[13]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[12]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[11]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[10]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[9]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[8]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[7]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[6]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[5]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[4]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[3]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[2]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[1]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[0]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #236]\n\t" + "# A[0] * B[60]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[59]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[58]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[57]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[56]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[55]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[54]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[53]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[52]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[51]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[50]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[49]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[48]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[47]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[46]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[45]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[44]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[43]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[42]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[41]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[40]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[39]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[38]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[37]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[36]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[35]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[34]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[33]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[32]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[31]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[30]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[29]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[28]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[27]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[26]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[25]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[24]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[23]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[22]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[21]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[20]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[19]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[18]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[17]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[16]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[15]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[14]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[13]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[12]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[11]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[10]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[9]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[8]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[7]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[6]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[5]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[4]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[3]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[2]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[1]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[0]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #240]\n\t" + "# A[0] * B[61]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[1] * B[60]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[2] * B[59]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[58]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[57]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[56]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[55]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[54]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[53]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[52]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[51]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[50]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[49]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[48]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[47]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[46]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[45]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[44]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[43]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[42]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[41]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[40]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[39]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[38]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[37]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[36]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[35]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[34]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[33]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[32]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[31]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[30]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[29]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[28]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[27]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[26]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[25]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[24]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[23]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[22]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[21]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[20]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[19]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[18]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[17]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[16]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[15]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[14]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[13]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[12]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[11]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[10]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[9]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[8]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[7]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[6]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[5]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[4]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[3]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[2]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[1]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[0]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [sp, #244]\n\t" + "# A[0] * B[62]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[1] * B[61]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[2] * B[60]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[3] * B[59]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[58]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[57]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[56]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[55]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[54]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[53]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[52]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[51]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[50]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[49]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[48]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[47]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[46]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[45]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[44]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[43]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[42]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[41]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[40]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[39]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[38]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[37]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[36]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[35]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[34]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[33]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[32]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[31]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[30]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[29]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[28]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[27]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[26]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[25]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[24]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[23]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[22]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[21]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[20]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[19]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[18]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[17]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[16]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[15]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[14]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[13]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[12]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[11]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[10]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[9]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[8]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[7]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[6]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[5]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[4]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[3]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[2]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[1]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[0]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [sp, #248]\n\t" + "# A[0] * B[63]\n\t" + "ldr r8, [%[a], #0]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[1] * B[62]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[2] * B[61]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[3] * B[60]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[4] * B[59]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[58]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[57]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[56]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[55]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[54]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[53]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[52]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[51]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[50]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[49]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[48]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[47]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[46]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[45]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[44]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[43]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[42]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[41]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[40]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[39]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[38]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[37]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[36]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[35]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[34]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[33]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[32]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[31]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[30]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[29]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[28]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[27]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[26]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[25]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[24]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[23]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[22]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[21]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[20]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[19]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[18]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[17]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[16]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[15]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[14]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[13]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[12]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[11]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[10]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[9]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[8]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[7]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[6]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[5]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[4]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[3]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[2]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[1]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[0]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #0]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [sp, #252]\n\t" + "# A[1] * B[63]\n\t" + "ldr r8, [%[a], #4]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[2] * B[62]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[3] * B[61]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[4] * B[60]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[5] * B[59]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[58]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[57]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[56]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[55]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[54]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[53]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[52]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[51]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[50]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[49]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[48]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[47]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[46]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[45]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[44]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[43]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[42]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[41]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[40]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[39]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[38]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[37]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[36]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[35]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[34]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[33]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[32]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[31]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[30]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[29]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[28]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[27]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[26]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[25]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[24]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[23]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[22]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[21]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[20]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[19]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[18]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[17]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[16]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[15]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[14]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[13]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[12]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[11]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[10]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[9]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[8]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[7]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[6]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[5]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[4]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[3]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[2]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[1]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #4]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #256]\n\t" + "# A[2] * B[63]\n\t" + "ldr r8, [%[a], #8]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[3] * B[62]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[4] * B[61]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[5] * B[60]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[6] * B[59]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[58]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[57]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[56]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[55]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[54]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[53]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[52]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[51]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[50]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[49]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[48]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[47]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[46]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[45]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[44]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[43]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[42]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[41]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[40]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[39]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[38]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[37]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[36]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[35]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[34]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[33]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[32]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[31]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[30]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[29]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[28]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[27]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[26]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[25]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[24]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[23]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[22]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[21]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[20]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[19]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[18]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[17]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[16]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[15]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[14]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[13]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[12]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[11]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[10]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[9]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[8]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[7]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[6]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[5]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[4]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[3]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[2]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #8]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #260]\n\t" + "# A[3] * B[63]\n\t" + "ldr r8, [%[a], #12]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[4] * B[62]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[5] * B[61]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[6] * B[60]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[7] * B[59]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[58]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[57]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[56]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[55]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[54]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[53]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[52]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[51]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[50]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[49]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[48]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[47]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[46]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[45]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[44]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[43]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[42]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[41]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[40]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[39]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[38]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[37]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[36]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[35]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[34]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[33]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[32]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[31]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[30]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[29]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[28]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[27]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[26]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[25]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[24]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[23]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[22]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[21]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[20]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[19]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[18]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[17]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[16]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[15]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[14]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[13]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[12]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[11]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[10]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[9]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[8]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[7]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[6]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[5]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[4]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[3]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #12]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #264]\n\t" + "# A[4] * B[63]\n\t" + "ldr r8, [%[a], #16]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[5] * B[62]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[6] * B[61]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[7] * B[60]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[8] * B[59]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[58]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[57]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[56]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[55]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[54]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[53]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[52]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[51]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[50]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[49]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[48]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[47]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[46]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[45]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[44]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[43]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[42]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[41]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[40]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[39]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[38]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[37]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[36]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[35]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[34]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[33]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[32]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[31]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[30]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[29]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[28]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[27]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[26]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[25]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[24]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[23]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[22]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[21]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[20]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[19]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[18]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[17]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[16]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[15]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[14]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[13]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[12]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[11]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[10]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[9]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[8]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[7]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[6]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[5]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[4]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #16]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #268]\n\t" + "# A[5] * B[63]\n\t" + "ldr r8, [%[a], #20]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[6] * B[62]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[7] * B[61]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[8] * B[60]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[9] * B[59]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[58]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[57]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[56]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[55]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[54]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[53]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[52]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[51]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[50]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[49]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[48]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[47]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[46]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[45]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[44]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[43]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[42]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[41]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[40]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[39]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[38]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[37]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[36]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[35]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[34]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[33]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[32]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[31]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[30]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[29]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[28]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[27]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[26]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[25]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[24]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[23]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[22]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[21]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[20]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[19]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[18]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[17]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[16]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[15]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[14]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[13]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[12]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[11]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[10]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[9]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[8]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[7]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[6]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[5]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #20]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #272]\n\t" + "# A[6] * B[63]\n\t" + "ldr r8, [%[a], #24]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[7] * B[62]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[8] * B[61]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[9] * B[60]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[10] * B[59]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[58]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[57]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[56]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[55]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[54]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[53]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[52]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[51]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[50]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[49]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[48]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[47]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[46]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[45]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[44]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[43]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[42]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[41]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[40]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[39]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[38]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[37]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[36]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[35]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[34]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[33]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[32]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[31]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[30]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[29]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[28]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[27]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[26]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[25]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[24]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[23]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[22]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[21]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[20]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[19]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[18]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[17]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[16]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[15]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[14]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[13]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[12]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[11]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[10]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[9]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[8]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[7]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[6]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #24]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #276]\n\t" + "# A[7] * B[63]\n\t" + "ldr r8, [%[a], #28]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[8] * B[62]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[9] * B[61]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[10] * B[60]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[11] * B[59]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[58]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[57]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[56]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[55]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[54]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[53]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[52]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[51]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[50]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[49]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[48]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[47]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[46]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[45]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[44]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[43]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[42]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[41]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[40]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[39]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[38]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[37]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[36]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[35]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[34]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[33]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[32]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[31]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[30]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[29]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[28]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[27]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[26]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[25]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[24]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[23]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[22]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[21]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[20]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[19]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[18]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[17]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[16]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[15]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[14]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[13]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[12]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[11]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[10]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[9]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[8]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[7]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #28]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #280]\n\t" + "# A[8] * B[63]\n\t" + "ldr r8, [%[a], #32]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[9] * B[62]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[10] * B[61]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[11] * B[60]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[12] * B[59]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[58]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[57]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[56]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[55]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[54]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[53]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[52]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[51]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[50]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[49]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[48]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[47]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[46]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[45]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[44]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[43]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[42]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[41]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[40]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[39]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[38]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[37]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[36]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[35]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[34]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[33]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[32]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[31]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[30]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[29]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[28]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[27]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[26]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[25]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[24]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[23]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[22]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[21]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[20]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[19]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[18]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[17]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[16]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[15]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[14]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[13]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[12]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[11]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[10]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[9]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[8]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #32]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #284]\n\t" + "# A[9] * B[63]\n\t" + "ldr r8, [%[a], #36]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[10] * B[62]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[11] * B[61]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[12] * B[60]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[13] * B[59]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[58]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[57]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[56]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[55]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[54]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[53]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[52]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[51]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[50]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[49]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[48]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[47]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[46]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[45]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[44]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[43]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[42]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[41]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[40]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[39]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[38]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[37]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[36]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[35]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[34]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[33]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[32]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[31]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[30]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[29]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[28]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[27]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[26]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[25]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[24]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[23]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[22]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[21]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[20]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[19]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[18]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[17]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[16]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[15]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[14]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[13]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[12]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[11]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[10]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[9]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #36]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #288]\n\t" + "# A[10] * B[63]\n\t" + "ldr r8, [%[a], #40]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[11] * B[62]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[12] * B[61]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[13] * B[60]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[14] * B[59]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[58]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[57]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[56]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[55]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[54]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[53]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[52]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[51]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[50]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[49]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[48]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[47]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[46]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[45]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[44]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[43]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[42]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[41]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[40]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[39]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[38]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[37]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[36]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[35]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[34]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[33]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[32]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[31]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[30]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[29]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[28]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[27]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[26]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[25]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[24]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[23]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[22]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[21]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[20]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[19]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[18]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[17]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[16]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[15]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[14]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[13]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[12]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[11]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[10]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #40]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #292]\n\t" + "# A[11] * B[63]\n\t" + "ldr r8, [%[a], #44]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[12] * B[62]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[13] * B[61]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[14] * B[60]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[15] * B[59]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[58]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[57]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[56]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[55]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[54]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[53]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[52]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[51]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[50]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[49]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[48]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[47]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[46]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[45]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[44]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[43]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[42]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[41]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[40]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[39]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[38]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[37]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[36]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[35]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[34]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[33]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[32]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[31]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[30]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[29]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[28]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[27]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[26]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[25]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[24]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[23]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[22]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[21]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[20]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[19]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[18]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[17]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[16]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[15]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[14]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[13]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[12]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[11]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #44]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #296]\n\t" + "# A[12] * B[63]\n\t" + "ldr r8, [%[a], #48]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[13] * B[62]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[14] * B[61]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[15] * B[60]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[16] * B[59]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[58]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[57]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[56]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[55]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[54]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[53]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[52]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[51]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[50]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[49]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[48]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[47]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[46]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[45]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[44]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[43]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[42]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[41]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[40]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[39]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[38]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[37]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[36]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[35]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[34]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[33]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[32]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[31]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[30]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[29]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[28]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[27]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[26]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[25]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[24]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[23]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[22]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[21]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[20]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[19]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[18]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[17]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[16]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[15]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[14]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[13]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[12]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #48]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #300]\n\t" + "# A[13] * B[63]\n\t" + "ldr r8, [%[a], #52]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[14] * B[62]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[15] * B[61]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[16] * B[60]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[17] * B[59]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[58]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[57]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[56]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[55]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[54]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[53]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[52]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[51]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[50]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[49]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[48]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[47]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[46]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[45]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[44]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[43]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[42]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[41]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[40]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[39]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[38]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[37]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[36]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[35]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[34]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[33]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[32]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[31]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[30]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[29]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[28]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[27]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[26]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[25]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[24]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[23]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[22]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[21]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[20]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[19]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[18]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[17]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[16]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[15]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[14]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[13]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #52]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #304]\n\t" + "# A[14] * B[63]\n\t" + "ldr r8, [%[a], #56]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[15] * B[62]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[16] * B[61]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[17] * B[60]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[18] * B[59]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[58]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[57]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[56]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[55]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[54]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[53]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[52]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[51]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[50]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[49]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[48]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[47]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[46]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[45]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[44]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[43]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[42]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[41]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[40]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[39]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[38]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[37]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[36]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[35]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[34]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[33]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[32]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[31]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[30]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[29]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[28]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[27]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[26]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[25]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[24]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[23]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[22]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[21]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[20]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[19]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[18]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[17]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[16]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[15]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[14]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #56]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #308]\n\t" + "# A[15] * B[63]\n\t" + "ldr r8, [%[a], #60]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[16] * B[62]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[17] * B[61]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[18] * B[60]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[19] * B[59]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[58]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[57]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[56]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[55]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[54]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[53]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[52]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[51]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[50]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[49]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[48]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[47]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[46]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[45]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[44]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[43]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[42]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[41]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[40]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[39]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[38]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[37]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[36]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[35]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[34]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[33]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[32]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[31]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[30]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[29]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[28]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[27]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[26]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[25]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[24]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[23]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[22]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[21]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[20]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[19]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[18]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[17]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[16]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[15]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #60]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #312]\n\t" + "# A[16] * B[63]\n\t" + "ldr r8, [%[a], #64]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[17] * B[62]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[18] * B[61]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[19] * B[60]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[20] * B[59]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[58]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[57]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[56]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[55]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[54]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[53]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[52]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[51]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[50]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[49]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[48]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[47]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[46]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[45]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[44]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[43]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[42]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[41]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[40]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[39]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[38]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[37]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[36]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[35]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[34]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[33]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[32]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[31]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[30]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[29]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[28]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[27]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[26]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[25]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[24]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[23]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[22]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[21]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[20]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[19]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[18]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[17]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[16]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #64]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #316]\n\t" + "# A[17] * B[63]\n\t" + "ldr r8, [%[a], #68]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[18] * B[62]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[19] * B[61]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[20] * B[60]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[21] * B[59]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[58]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[57]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[56]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[55]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[54]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[53]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[52]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[51]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[50]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[49]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[48]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[47]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[46]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[45]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[44]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[43]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[42]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[41]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[40]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[39]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[38]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[37]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[36]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[35]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[34]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[33]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[32]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[31]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[30]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[29]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[28]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[27]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[26]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[25]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[24]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[23]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[22]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[21]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[20]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[19]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[18]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[17]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #68]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #320]\n\t" + "# A[18] * B[63]\n\t" + "ldr r8, [%[a], #72]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[19] * B[62]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[20] * B[61]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[21] * B[60]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[22] * B[59]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[58]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[57]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[56]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[55]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[54]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[53]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[52]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[51]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[50]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[49]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[48]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[47]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[46]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[45]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[44]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[43]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[42]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[41]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[40]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[39]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[38]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[37]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[36]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[35]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[34]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[33]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[32]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[31]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[30]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[29]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[28]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[27]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[26]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[25]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[24]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[23]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[22]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[21]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[20]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[19]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[18]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #72]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #324]\n\t" + "# A[19] * B[63]\n\t" + "ldr r8, [%[a], #76]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[20] * B[62]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[21] * B[61]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[22] * B[60]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[23] * B[59]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[58]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[57]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[56]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[55]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[54]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[53]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[52]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[51]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[50]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[49]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[48]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[47]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[46]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[45]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[44]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[43]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[42]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[41]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[40]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[39]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[38]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[37]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[36]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[35]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[34]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[33]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[32]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[31]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[30]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[29]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[28]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[27]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[26]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[25]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[24]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[23]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[22]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[21]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[20]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[19]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #76]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #328]\n\t" + "# A[20] * B[63]\n\t" + "ldr r8, [%[a], #80]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[21] * B[62]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[22] * B[61]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[23] * B[60]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[24] * B[59]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[58]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[57]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[56]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[55]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[54]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[53]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[52]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[51]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[50]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[49]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[48]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[47]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[46]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[45]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[44]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[43]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[42]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[41]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[40]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[39]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[38]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[37]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[36]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[35]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[34]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[33]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[32]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[31]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[30]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[29]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[28]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[27]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[26]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[25]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[24]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[23]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[22]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[21]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[20]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #80]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #332]\n\t" + "# A[21] * B[63]\n\t" + "ldr r8, [%[a], #84]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[22] * B[62]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[23] * B[61]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[24] * B[60]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[25] * B[59]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[58]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[57]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[56]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[55]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[54]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[53]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[52]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[51]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[50]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[49]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[48]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[47]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[46]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[45]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[44]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[43]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[42]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[41]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[40]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[39]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[38]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[37]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[36]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[35]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[34]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[33]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[32]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[31]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[30]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[29]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[28]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[27]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[26]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[25]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[24]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[23]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[22]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[21]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #84]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #336]\n\t" + "# A[22] * B[63]\n\t" + "ldr r8, [%[a], #88]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[23] * B[62]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[24] * B[61]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[25] * B[60]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[26] * B[59]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[58]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[57]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[56]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[55]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[54]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[53]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[52]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[51]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[50]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[49]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[48]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[47]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[46]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[45]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[44]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[43]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[42]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[41]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[40]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[39]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[38]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[37]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[36]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[35]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[34]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[33]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[32]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[31]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[30]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[29]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[28]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[27]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[26]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[25]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[24]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[23]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[22]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #88]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #340]\n\t" + "# A[23] * B[63]\n\t" + "ldr r8, [%[a], #92]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[24] * B[62]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[25] * B[61]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[26] * B[60]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[27] * B[59]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[58]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[57]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[56]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[55]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[54]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[53]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[52]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[51]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[50]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[49]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[48]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[47]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[46]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[45]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[44]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[43]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[42]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[41]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[40]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[39]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[38]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[37]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[36]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[35]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[34]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[33]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[32]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[31]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[30]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[29]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[28]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[27]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[26]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[25]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[24]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[23]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #92]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #344]\n\t" + "# A[24] * B[63]\n\t" + "ldr r8, [%[a], #96]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[25] * B[62]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[26] * B[61]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[27] * B[60]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[28] * B[59]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[58]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[57]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[56]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[55]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[54]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[53]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[52]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[51]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[50]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[49]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[48]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[47]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[46]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[45]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[44]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[43]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[42]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[41]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[40]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[39]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[38]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[37]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[36]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[35]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[34]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[33]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[32]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[31]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[30]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[29]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[28]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[27]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[26]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[25]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[24]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #96]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #348]\n\t" + "# A[25] * B[63]\n\t" + "ldr r8, [%[a], #100]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[26] * B[62]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[27] * B[61]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[28] * B[60]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[29] * B[59]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[58]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[57]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[56]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[55]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[54]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[53]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[52]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[51]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[50]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[49]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[48]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[47]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[46]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[45]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[44]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[43]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[42]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[41]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[40]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[39]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[38]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[37]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[36]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[35]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[34]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[33]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[32]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[31]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[30]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[29]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[28]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[27]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[26]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[25]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #100]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #352]\n\t" + "# A[26] * B[63]\n\t" + "ldr r8, [%[a], #104]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[27] * B[62]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[28] * B[61]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[29] * B[60]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[30] * B[59]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[58]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[57]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[56]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[55]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[54]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[53]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[52]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[51]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[50]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[49]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[48]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[47]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[46]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[45]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[44]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[43]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[42]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[41]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[40]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[39]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[38]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[37]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[36]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[35]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[34]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[33]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[32]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[31]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[30]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[29]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[28]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[27]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[26]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #104]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #356]\n\t" + "# A[27] * B[63]\n\t" + "ldr r8, [%[a], #108]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[28] * B[62]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[29] * B[61]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[30] * B[60]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[31] * B[59]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[58]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[57]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[56]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[55]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[54]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[53]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[52]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[51]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[50]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[49]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[48]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[47]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[46]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[45]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[44]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[43]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[42]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[41]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[40]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[39]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[38]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[37]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[36]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[35]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[34]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[33]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[32]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[31]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[30]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[29]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[28]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[27]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #108]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #360]\n\t" + "# A[28] * B[63]\n\t" + "ldr r8, [%[a], #112]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[29] * B[62]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[30] * B[61]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[31] * B[60]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[32] * B[59]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[58]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[57]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[56]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[55]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[54]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[53]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[52]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[51]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[50]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[49]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[48]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[47]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[46]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[45]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[44]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[43]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[42]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[41]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[40]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[39]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[38]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[37]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[36]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[35]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[34]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[33]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[32]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[31]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[30]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[29]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[28]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #112]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #364]\n\t" + "# A[29] * B[63]\n\t" + "ldr r8, [%[a], #116]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[30] * B[62]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[31] * B[61]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[32] * B[60]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[33] * B[59]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[58]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[57]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[56]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[55]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[54]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[53]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[52]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[51]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[50]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[49]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[48]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[47]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[46]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[45]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[44]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[43]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[42]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[41]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[40]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[39]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[38]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[37]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[36]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[35]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[34]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[33]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[32]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[31]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[30]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[29]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #116]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #368]\n\t" + "# A[30] * B[63]\n\t" + "ldr r8, [%[a], #120]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[31] * B[62]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[32] * B[61]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[33] * B[60]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[34] * B[59]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[58]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[57]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[56]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[55]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[54]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[53]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[52]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[51]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[50]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[49]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[48]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[47]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[46]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[45]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[44]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[43]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[42]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[41]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[40]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[39]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[38]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[37]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[36]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[35]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[34]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[33]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[32]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[31]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[30]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #120]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #372]\n\t" + "# A[31] * B[63]\n\t" + "ldr r8, [%[a], #124]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[32] * B[62]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[33] * B[61]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[34] * B[60]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[35] * B[59]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[58]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[57]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[56]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[55]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[54]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[53]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[52]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[51]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[50]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[49]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[48]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[47]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[46]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[45]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[44]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[43]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[42]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[41]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[40]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[39]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[38]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[37]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[36]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[35]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[34]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[33]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[32]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[31]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #124]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #376]\n\t" + "# A[32] * B[63]\n\t" + "ldr r8, [%[a], #128]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[33] * B[62]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[34] * B[61]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[35] * B[60]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[36] * B[59]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[58]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[57]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[56]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[55]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[54]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[53]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[52]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[51]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[50]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[49]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[48]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[47]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[46]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[45]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[44]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[43]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[42]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[41]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[40]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[39]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[38]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[37]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[36]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[35]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[34]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[33]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[32]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #128]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #380]\n\t" + "# A[33] * B[63]\n\t" + "ldr r8, [%[a], #132]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[34] * B[62]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[35] * B[61]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[36] * B[60]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[37] * B[59]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[58]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[57]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[56]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[55]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[54]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[53]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[52]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[51]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[50]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[49]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[48]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[47]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[46]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[45]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[44]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[43]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[42]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[41]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[40]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[39]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[38]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[37]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[36]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[35]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[34]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[33]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #132]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #384]\n\t" + "# A[34] * B[63]\n\t" + "ldr r8, [%[a], #136]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[35] * B[62]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[36] * B[61]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[37] * B[60]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[38] * B[59]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[58]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[57]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[56]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[55]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[54]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[53]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[52]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[51]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[50]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[49]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[48]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[47]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[46]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[45]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[44]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[43]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[42]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[41]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[40]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[39]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[38]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[37]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[36]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[35]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[34]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #136]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #388]\n\t" + "# A[35] * B[63]\n\t" + "ldr r8, [%[a], #140]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[36] * B[62]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[37] * B[61]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[38] * B[60]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[39] * B[59]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[58]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[57]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[56]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[55]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[54]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[53]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[52]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[51]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[50]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[49]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[48]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[47]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[46]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[45]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[44]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[43]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[42]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[41]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[40]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[39]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[38]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[37]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[36]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[35]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #140]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #392]\n\t" + "# A[36] * B[63]\n\t" + "ldr r8, [%[a], #144]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[37] * B[62]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[38] * B[61]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[39] * B[60]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[40] * B[59]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[58]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[57]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[56]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[55]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[54]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[53]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[52]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[51]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[50]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[49]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[48]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[47]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[46]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[45]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[44]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[43]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[42]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[41]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[40]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[39]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[38]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[37]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[36]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #144]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #396]\n\t" + "# A[37] * B[63]\n\t" + "ldr r8, [%[a], #148]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[38] * B[62]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[39] * B[61]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[40] * B[60]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[41] * B[59]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[58]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[57]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[56]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[55]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[54]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[53]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[52]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[51]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[50]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[49]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[48]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[47]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[46]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[45]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[44]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[43]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[42]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[41]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[40]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[39]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[38]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[37]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #148]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #400]\n\t" + "# A[38] * B[63]\n\t" + "ldr r8, [%[a], #152]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[39] * B[62]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[40] * B[61]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[41] * B[60]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[42] * B[59]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[58]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[57]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[56]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[55]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[54]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[53]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[52]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[51]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[50]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[49]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[48]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[47]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[46]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[45]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[44]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[43]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[42]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[41]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[40]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[39]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[38]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #152]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #404]\n\t" + "# A[39] * B[63]\n\t" + "ldr r8, [%[a], #156]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[40] * B[62]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[41] * B[61]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[42] * B[60]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[43] * B[59]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[58]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[57]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[56]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[55]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[54]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[53]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[52]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[51]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[50]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[49]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[48]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[47]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[46]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[45]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[44]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[43]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[42]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[41]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[40]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[39]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #156]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #408]\n\t" + "# A[40] * B[63]\n\t" + "ldr r8, [%[a], #160]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[41] * B[62]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[42] * B[61]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[43] * B[60]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[44] * B[59]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[58]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[57]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[56]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[55]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[54]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[53]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[52]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[51]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[50]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[49]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[48]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[47]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[46]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[45]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[44]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[43]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[42]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[41]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[40]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #160]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #412]\n\t" + "# A[41] * B[63]\n\t" + "ldr r8, [%[a], #164]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[42] * B[62]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[43] * B[61]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[44] * B[60]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[45] * B[59]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[58]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[57]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[56]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[55]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[54]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[53]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[52]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[51]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[50]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[49]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[48]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[47]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[46]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[45]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[44]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[43]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[42]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[41]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #164]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #416]\n\t" + "# A[42] * B[63]\n\t" + "ldr r8, [%[a], #168]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[43] * B[62]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[44] * B[61]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[45] * B[60]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[46] * B[59]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[58]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[57]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[56]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[55]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[54]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[53]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[52]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[51]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[50]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[49]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[48]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[47]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[46]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[45]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[44]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[43]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[42]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #168]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #420]\n\t" + "# A[43] * B[63]\n\t" + "ldr r8, [%[a], #172]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[44] * B[62]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[45] * B[61]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[46] * B[60]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[47] * B[59]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[58]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[57]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[56]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[55]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[54]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[53]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[52]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[51]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[50]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[49]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[48]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[47]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[46]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[45]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[44]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[43]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #172]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #424]\n\t" + "# A[44] * B[63]\n\t" + "ldr r8, [%[a], #176]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[45] * B[62]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[46] * B[61]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[47] * B[60]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[48] * B[59]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[58]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[57]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[56]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[55]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[54]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[53]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[52]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[51]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[50]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[49]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[48]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[47]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[46]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[45]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[44]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #176]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #428]\n\t" + "# A[45] * B[63]\n\t" + "ldr r8, [%[a], #180]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[46] * B[62]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[47] * B[61]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[48] * B[60]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[49] * B[59]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[58]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[57]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[56]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[55]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[54]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[53]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[52]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[51]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[50]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[49]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[48]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[47]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[46]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[45]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #180]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #432]\n\t" + "# A[46] * B[63]\n\t" + "ldr r8, [%[a], #184]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[47] * B[62]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[48] * B[61]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[49] * B[60]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[50] * B[59]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[58]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[57]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[56]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[55]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[54]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[53]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[52]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[51]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[50]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[49]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[48]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[47]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[46]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #184]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #436]\n\t" + "# A[47] * B[63]\n\t" + "ldr r8, [%[a], #188]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[48] * B[62]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[49] * B[61]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[50] * B[60]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[51] * B[59]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[58]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[57]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[56]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[55]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[54]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[53]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[52]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[51]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[50]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[49]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[48]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[47]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #188]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #440]\n\t" + "# A[48] * B[63]\n\t" + "ldr r8, [%[a], #192]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[49] * B[62]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[50] * B[61]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[51] * B[60]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[52] * B[59]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[58]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[57]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[56]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[55]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[54]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[53]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[52]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[51]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[50]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[49]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[48]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #192]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #444]\n\t" + "# A[49] * B[63]\n\t" + "ldr r8, [%[a], #196]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[50] * B[62]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[51] * B[61]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[52] * B[60]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[53] * B[59]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[58]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[57]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[56]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[55]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[54]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[53]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[52]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[51]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[50]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[49]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #196]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #448]\n\t" + "# A[50] * B[63]\n\t" + "ldr r8, [%[a], #200]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[51] * B[62]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[52] * B[61]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[53] * B[60]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[54] * B[59]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[58]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[57]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[56]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[55]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[54]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[53]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[52]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[51]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[50]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #200]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #452]\n\t" + "# A[51] * B[63]\n\t" + "ldr r8, [%[a], #204]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[52] * B[62]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[53] * B[61]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[54] * B[60]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[55] * B[59]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[58]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[57]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[56]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[55]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[54]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[53]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[52]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[51]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #204]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #456]\n\t" + "# A[52] * B[63]\n\t" + "ldr r8, [%[a], #208]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[53] * B[62]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[54] * B[61]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[55] * B[60]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[56] * B[59]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[58]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[57]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[56]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[55]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[54]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[53]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[52]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #208]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #460]\n\t" + "# A[53] * B[63]\n\t" + "ldr r8, [%[a], #212]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[54] * B[62]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[55] * B[61]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[56] * B[60]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[57] * B[59]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[58]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[57]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[56]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[55]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[54]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[53]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #212]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #464]\n\t" + "# A[54] * B[63]\n\t" + "ldr r8, [%[a], #216]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[55] * B[62]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[56] * B[61]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[57] * B[60]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[58] * B[59]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[58]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[57]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[56]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[55]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[54]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #216]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #468]\n\t" + "# A[55] * B[63]\n\t" + "ldr r8, [%[a], #220]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[56] * B[62]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[57] * B[61]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[58] * B[60]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[59] * B[59]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[58]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[57]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[56]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[55]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #220]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #472]\n\t" + "# A[56] * B[63]\n\t" + "ldr r8, [%[a], #224]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[57] * B[62]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[58] * B[61]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[59] * B[60]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[60] * B[59]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[58]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[57]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[56]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #224]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #476]\n\t" + "# A[57] * B[63]\n\t" + "ldr r8, [%[a], #228]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[58] * B[62]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[59] * B[61]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[60] * B[60]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[61] * B[59]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[58]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[57]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #228]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #480]\n\t" + "# A[58] * B[63]\n\t" + "ldr r8, [%[a], #232]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[59] * B[62]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[60] * B[61]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[61] * B[60]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[62] * B[59]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[58]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #232]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #484]\n\t" + "# A[59] * B[63]\n\t" + "ldr r8, [%[a], #236]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[60] * B[62]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[61] * B[61]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[62] * B[60]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "# A[63] * B[59]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #236]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #488]\n\t" + "# A[60] * B[63]\n\t" + "ldr r8, [%[a], #240]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r10, r10\n\t" + "# A[61] * B[62]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[62] * B[61]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "# A[63] * B[60]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #240]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #492]\n\t" + "# A[61] * B[63]\n\t" + "ldr r8, [%[a], #244]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r10, r10\n\t" + "# A[62] * B[62]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "# A[63] * B[61]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #244]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #496]\n\t" + "# A[62] * B[63]\n\t" + "ldr r8, [%[a], #248]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r10, r10\n\t" + "# A[63] * B[62]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #248]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #500]\n\t" + "# A[63] * B[63]\n\t" + "ldr r8, [%[a], #252]\n\t" + "ldr r9, [%[b], #252]\n\t" + "umull r6, r7, r8, r9\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r3, [%[r], #504]\n\t" + "str r4, [%[r], #508]\n\t" + "ldr r3, [sp, #0]\n\t" + "ldr r4, [sp, #4]\n\t" + "ldr r5, [sp, #8]\n\t" + "ldr r6, [sp, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [sp, #16]\n\t" + "ldr r4, [sp, #20]\n\t" + "ldr r5, [sp, #24]\n\t" + "ldr r6, [sp, #28]\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r3, [sp, #32]\n\t" + "ldr r4, [sp, #36]\n\t" + "ldr r5, [sp, #40]\n\t" + "ldr r6, [sp, #44]\n\t" + "str r3, [%[r], #32]\n\t" + "str r4, [%[r], #36]\n\t" + "str r5, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r3, [sp, #48]\n\t" + "ldr r4, [sp, #52]\n\t" + "ldr r5, [sp, #56]\n\t" + "ldr r6, [sp, #60]\n\t" + "str r3, [%[r], #48]\n\t" + "str r4, [%[r], #52]\n\t" + "str r5, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r3, [sp, #64]\n\t" + "ldr r4, [sp, #68]\n\t" + "ldr r5, [sp, #72]\n\t" + "ldr r6, [sp, #76]\n\t" + "str r3, [%[r], #64]\n\t" + "str r4, [%[r], #68]\n\t" + "str r5, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r3, [sp, #80]\n\t" + "ldr r4, [sp, #84]\n\t" + "ldr r5, [sp, #88]\n\t" + "ldr r6, [sp, #92]\n\t" + "str r3, [%[r], #80]\n\t" + "str r4, [%[r], #84]\n\t" + "str r5, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r3, [sp, #96]\n\t" + "ldr r4, [sp, #100]\n\t" + "ldr r5, [sp, #104]\n\t" + "ldr r6, [sp, #108]\n\t" + "str r3, [%[r], #96]\n\t" + "str r4, [%[r], #100]\n\t" + "str r5, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r3, [sp, #112]\n\t" + "ldr r4, [sp, #116]\n\t" + "ldr r5, [sp, #120]\n\t" + "ldr r6, [sp, #124]\n\t" + "str r3, [%[r], #112]\n\t" + "str r4, [%[r], #116]\n\t" + "str r5, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r3, [sp, #128]\n\t" + "ldr r4, [sp, #132]\n\t" + "ldr r5, [sp, #136]\n\t" + "ldr r6, [sp, #140]\n\t" + "str r3, [%[r], #128]\n\t" + "str r4, [%[r], #132]\n\t" + "str r5, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r3, [sp, #144]\n\t" + "ldr r4, [sp, #148]\n\t" + "ldr r5, [sp, #152]\n\t" + "ldr r6, [sp, #156]\n\t" + "str r3, [%[r], #144]\n\t" + "str r4, [%[r], #148]\n\t" + "str r5, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r3, [sp, #160]\n\t" + "ldr r4, [sp, #164]\n\t" + "ldr r5, [sp, #168]\n\t" + "ldr r6, [sp, #172]\n\t" + "str r3, [%[r], #160]\n\t" + "str r4, [%[r], #164]\n\t" + "str r5, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r3, [sp, #176]\n\t" + "ldr r4, [sp, #180]\n\t" + "ldr r5, [sp, #184]\n\t" + "ldr r6, [sp, #188]\n\t" + "str r3, [%[r], #176]\n\t" + "str r4, [%[r], #180]\n\t" + "str r5, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "ldr r3, [sp, #192]\n\t" + "ldr r4, [sp, #196]\n\t" + "ldr r5, [sp, #200]\n\t" + "ldr r6, [sp, #204]\n\t" + "str r3, [%[r], #192]\n\t" + "str r4, [%[r], #196]\n\t" + "str r5, [%[r], #200]\n\t" + "str r6, [%[r], #204]\n\t" + "ldr r3, [sp, #208]\n\t" + "ldr r4, [sp, #212]\n\t" + "ldr r5, [sp, #216]\n\t" + "ldr r6, [sp, #220]\n\t" + "str r3, [%[r], #208]\n\t" + "str r4, [%[r], #212]\n\t" + "str r5, [%[r], #216]\n\t" + "str r6, [%[r], #220]\n\t" + "ldr r3, [sp, #224]\n\t" + "ldr r4, [sp, #228]\n\t" + "ldr r5, [sp, #232]\n\t" + "ldr r6, [sp, #236]\n\t" + "str r3, [%[r], #224]\n\t" + "str r4, [%[r], #228]\n\t" + "str r5, [%[r], #232]\n\t" + "str r6, [%[r], #236]\n\t" + "ldr r3, [sp, #240]\n\t" + "ldr r4, [sp, #244]\n\t" + "ldr r5, [sp, #248]\n\t" + "ldr r6, [sp, #252]\n\t" + "str r3, [%[r], #240]\n\t" + "str r4, [%[r], #244]\n\t" + "str r5, [%[r], #248]\n\t" + "str r6, [%[r], #252]\n\t" + "add sp, sp, #256\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit b1[64]; + sp_digit z2[128]; + sp_digit u, ca, cb; + + ca = sp_2048_add_64(a1, a, &a[64]); + cb = sp_2048_add_64(b1, b, &b[64]); + u = ca & cb; + sp_2048_mul_64(z1, a1, b1); + sp_2048_mul_64(z2, &a[64], &b[64]); + sp_2048_mul_64(z0, a, b); + sp_2048_mask_64(r + 128, a1, 0 - cb); + sp_2048_mask_64(b1, b1, 0 - ca); + u += sp_2048_add_64(r + 128, r + 128, b1); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #256\n\t" + "mov r14, #0\n\t" + "# A[0] * A[0]\n\t" + "ldr r10, [%[a], #0]\n\t" + "umull r8, r3, r10, r10\n\t" + "mov r4, #0\n\t" + "str r8, [sp]\n\t" + "# A[0] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #4]\n\t" + "# A[0] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[1] * A[1]\n\t" + "ldr r10, [%[a], #4]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [sp, #8]\n\t" + "# A[0] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[1] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [sp, #12]\n\t" + "# A[0] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[1] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[2] * A[2]\n\t" + "ldr r10, [%[a], #8]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [sp, #16]\n\t" + "# A[0] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #20]\n\t" + "# A[0] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[3]\n\t" + "ldr r10, [%[a], #12]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #24]\n\t" + "# A[0] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #28]\n\t" + "# A[0] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[4]\n\t" + "ldr r10, [%[a], #16]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #32]\n\t" + "# A[0] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #36]\n\t" + "# A[0] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[5]\n\t" + "ldr r10, [%[a], #20]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #40]\n\t" + "# A[0] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #44]\n\t" + "# A[0] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[6]\n\t" + "ldr r10, [%[a], #24]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #48]\n\t" + "# A[0] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #52]\n\t" + "# A[0] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[7]\n\t" + "ldr r10, [%[a], #28]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #56]\n\t" + "# A[0] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #60]\n\t" + "# A[0] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[8]\n\t" + "ldr r10, [%[a], #32]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #64]\n\t" + "# A[0] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #68]\n\t" + "# A[0] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[9]\n\t" + "ldr r10, [%[a], #36]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #72]\n\t" + "# A[0] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #76]\n\t" + "# A[0] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[10]\n\t" + "ldr r10, [%[a], #40]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #80]\n\t" + "# A[0] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #84]\n\t" + "# A[0] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[11]\n\t" + "ldr r10, [%[a], #44]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #88]\n\t" + "# A[0] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #92]\n\t" + "# A[0] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[12]\n\t" + "ldr r10, [%[a], #48]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #96]\n\t" + "# A[0] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #100]\n\t" + "# A[0] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[13]\n\t" + "ldr r10, [%[a], #52]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #104]\n\t" + "# A[0] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #108]\n\t" + "# A[0] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[14]\n\t" + "ldr r10, [%[a], #56]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #112]\n\t" + "# A[0] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #116]\n\t" + "# A[0] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[15]\n\t" + "ldr r10, [%[a], #60]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #120]\n\t" + "# A[0] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #124]\n\t" + "# A[0] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[16]\n\t" + "ldr r10, [%[a], #64]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #128]\n\t" + "# A[0] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #132]\n\t" + "# A[0] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[17]\n\t" + "ldr r10, [%[a], #68]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #136]\n\t" + "# A[0] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #140]\n\t" + "# A[0] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[18]\n\t" + "ldr r10, [%[a], #72]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #144]\n\t" + "# A[0] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #148]\n\t" + "# A[0] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[19]\n\t" + "ldr r10, [%[a], #76]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #152]\n\t" + "# A[0] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #156]\n\t" + "# A[0] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[20]\n\t" + "ldr r10, [%[a], #80]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #160]\n\t" + "# A[0] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #164]\n\t" + "# A[0] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[21]\n\t" + "ldr r10, [%[a], #84]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #168]\n\t" + "# A[0] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #172]\n\t" + "# A[0] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[22]\n\t" + "ldr r10, [%[a], #88]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #176]\n\t" + "# A[0] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #180]\n\t" + "# A[0] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[23]\n\t" + "ldr r10, [%[a], #92]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #184]\n\t" + "# A[0] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #188]\n\t" + "# A[0] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[24]\n\t" + "ldr r10, [%[a], #96]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #192]\n\t" + "# A[0] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #196]\n\t" + "# A[0] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[25]\n\t" + "ldr r10, [%[a], #100]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #200]\n\t" + "# A[0] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #204]\n\t" + "# A[0] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[26]\n\t" + "ldr r10, [%[a], #104]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #208]\n\t" + "# A[0] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #212]\n\t" + "# A[0] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[27]\n\t" + "ldr r10, [%[a], #108]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #216]\n\t" + "# A[0] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #220]\n\t" + "# A[0] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[28]\n\t" + "ldr r10, [%[a], #112]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #224]\n\t" + "# A[0] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #228]\n\t" + "# A[0] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[29]\n\t" + "ldr r10, [%[a], #116]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #232]\n\t" + "# A[0] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #236]\n\t" + "# A[0] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[30]\n\t" + "ldr r10, [%[a], #120]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #240]\n\t" + "# A[0] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [sp, #244]\n\t" + "# A[0] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[31]\n\t" + "ldr r10, [%[a], #124]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [sp, #248]\n\t" + "# A[0] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #0]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[1] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[2] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [sp, #252]\n\t" + "# A[1] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[2] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[3] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[32]\n\t" + "ldr r10, [%[a], #128]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #256]\n\t" + "# A[2] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[3] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[4] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #260]\n\t" + "# A[3] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[4] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[5] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[33]\n\t" + "ldr r10, [%[a], #132]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #264]\n\t" + "# A[4] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[5] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[6] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #268]\n\t" + "# A[5] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[6] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[7] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[34]\n\t" + "ldr r10, [%[a], #136]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #272]\n\t" + "# A[6] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[7] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[8] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #276]\n\t" + "# A[7] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[8] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[9] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[35]\n\t" + "ldr r10, [%[a], #140]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #280]\n\t" + "# A[8] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #32]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[9] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[10] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #284]\n\t" + "# A[9] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #36]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[10] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[11] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[36]\n\t" + "ldr r10, [%[a], #144]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #288]\n\t" + "# A[10] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #40]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[11] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[12] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #292]\n\t" + "# A[11] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #44]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[12] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[13] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[37]\n\t" + "ldr r10, [%[a], #148]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #296]\n\t" + "# A[12] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #48]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[13] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[14] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #300]\n\t" + "# A[13] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #52]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[14] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[15] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[38]\n\t" + "ldr r10, [%[a], #152]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #304]\n\t" + "# A[14] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #56]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[15] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[16] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #308]\n\t" + "# A[15] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #60]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[16] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[17] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[39]\n\t" + "ldr r10, [%[a], #156]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #312]\n\t" + "# A[16] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #64]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[17] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[18] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #316]\n\t" + "# A[17] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #68]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[18] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[19] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[40]\n\t" + "ldr r10, [%[a], #160]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #320]\n\t" + "# A[18] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #72]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[19] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[20] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #324]\n\t" + "# A[19] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #76]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[20] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[21] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[41]\n\t" + "ldr r10, [%[a], #164]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #328]\n\t" + "# A[20] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #80]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[21] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[22] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #332]\n\t" + "# A[21] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #84]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[22] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[23] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[42]\n\t" + "ldr r10, [%[a], #168]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #336]\n\t" + "# A[22] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #88]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[23] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[24] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #340]\n\t" + "# A[23] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #92]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[24] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[25] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[43]\n\t" + "ldr r10, [%[a], #172]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #344]\n\t" + "# A[24] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #96]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[25] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[26] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #348]\n\t" + "# A[25] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #100]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[26] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[27] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[44]\n\t" + "ldr r10, [%[a], #176]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #352]\n\t" + "# A[26] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #104]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[27] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[28] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #356]\n\t" + "# A[27] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #108]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[28] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[29] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[45]\n\t" + "ldr r10, [%[a], #180]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #360]\n\t" + "# A[28] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #112]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[29] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[30] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #364]\n\t" + "# A[29] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #116]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[30] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[31] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[46]\n\t" + "ldr r10, [%[a], #184]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #368]\n\t" + "# A[30] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #120]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[31] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[32] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #372]\n\t" + "# A[31] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #124]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[32] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[33] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[47]\n\t" + "ldr r10, [%[a], #188]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #376]\n\t" + "# A[32] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #128]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[33] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[34] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #380]\n\t" + "# A[33] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #132]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[34] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[35] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[48]\n\t" + "ldr r10, [%[a], #192]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #384]\n\t" + "# A[34] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #136]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[35] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[36] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #388]\n\t" + "# A[35] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #140]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[36] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[37] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[49]\n\t" + "ldr r10, [%[a], #196]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #392]\n\t" + "# A[36] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #144]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[37] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[38] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #396]\n\t" + "# A[37] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #148]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[38] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[39] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[50]\n\t" + "ldr r10, [%[a], #200]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #400]\n\t" + "# A[38] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #152]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[39] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[40] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #404]\n\t" + "# A[39] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #156]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[40] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[41] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[51]\n\t" + "ldr r10, [%[a], #204]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #408]\n\t" + "# A[40] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #160]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[41] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[42] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #412]\n\t" + "# A[41] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #164]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[42] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[43] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[52]\n\t" + "ldr r10, [%[a], #208]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #416]\n\t" + "# A[42] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #168]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[43] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[44] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #420]\n\t" + "# A[43] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #172]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[44] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[45] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[53]\n\t" + "ldr r10, [%[a], #212]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #424]\n\t" + "# A[44] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #176]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[45] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[46] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #428]\n\t" + "# A[45] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #180]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[46] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[47] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[54]\n\t" + "ldr r10, [%[a], #216]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #432]\n\t" + "# A[46] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #184]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[47] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[48] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #436]\n\t" + "# A[47] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #188]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[48] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[49] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[55]\n\t" + "ldr r10, [%[a], #220]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #440]\n\t" + "# A[48] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #192]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[49] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[50] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #444]\n\t" + "# A[49] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #196]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[50] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[51] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[56]\n\t" + "ldr r10, [%[a], #224]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #448]\n\t" + "# A[50] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #200]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[51] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[52] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #452]\n\t" + "# A[51] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #204]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[52] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[53] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[57]\n\t" + "ldr r10, [%[a], #228]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #456]\n\t" + "# A[52] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #208]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[53] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[54] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #460]\n\t" + "# A[53] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #212]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[54] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[55] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[58] * A[58]\n\t" + "ldr r10, [%[a], #232]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #464]\n\t" + "# A[54] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #216]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[55] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[56] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[58] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #468]\n\t" + "# A[55] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #220]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[56] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[57] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[58] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[59] * A[59]\n\t" + "ldr r10, [%[a], #236]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #472]\n\t" + "# A[56] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #224]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r3, #0\n\t" + "mov r7, #0\n\t" + "# A[57] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[58] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[59] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "ldr r8, [%[a], #236]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r4, r4, r5\n\t" + "adcs r2, r2, r6\n\t" + "adc r3, r3, r7\n\t" + "str r4, [%[r], #476]\n\t" + "# A[57] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #228]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r4, #0\n\t" + "mov r7, #0\n\t" + "# A[58] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[59] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #236]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[60] * A[60]\n\t" + "ldr r10, [%[a], #240]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r2, [%[r], #480]\n\t" + "# A[58] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #232]\n\t" + "umull r5, r6, r10, r8\n\t" + "mov r2, #0\n\t" + "mov r7, #0\n\t" + "# A[59] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #236]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "# A[60] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "ldr r8, [%[a], #240]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, r14\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc r2, r2, r7\n\t" + "str r3, [%[r], #484]\n\t" + "# A[59] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #236]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[60] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #240]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "# A[61] * A[61]\n\t" + "ldr r10, [%[a], #244]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [%[r], #488]\n\t" + "# A[60] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #240]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r14, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "# A[61] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "ldr r8, [%[a], #244]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r14\n\t" + "str r2, [%[r], #492]\n\t" + "# A[61] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #244]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r14, r14\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "# A[62] * A[62]\n\t" + "ldr r10, [%[a], #248]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, r14\n\t" + "str r3, [%[r], #496]\n\t" + "# A[62] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "ldr r8, [%[a], #248]\n\t" + "umull r8, r9, r10, r8\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r14, r14\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, r14\n\t" + "str r4, [%[r], #500]\n\t" + "# A[63] * A[63]\n\t" + "ldr r10, [%[a], #252]\n\t" + "umull r8, r9, r10, r10\n\t" + "adds r2, r2, r8\n\t" + "adc r3, r3, r9\n\t" + "str r2, [%[r], #504]\n\t" + "str r3, [%[r], #508]\n\t" + "ldr r2, [sp, #0]\n\t" + "ldr r3, [sp, #4]\n\t" + "ldr r4, [sp, #8]\n\t" + "ldr r8, [sp, #12]\n\t" + "str r2, [%[r], #0]\n\t" + "str r3, [%[r], #4]\n\t" + "str r4, [%[r], #8]\n\t" + "str r8, [%[r], #12]\n\t" + "ldr r2, [sp, #16]\n\t" + "ldr r3, [sp, #20]\n\t" + "ldr r4, [sp, #24]\n\t" + "ldr r8, [sp, #28]\n\t" + "str r2, [%[r], #16]\n\t" + "str r3, [%[r], #20]\n\t" + "str r4, [%[r], #24]\n\t" + "str r8, [%[r], #28]\n\t" + "ldr r2, [sp, #32]\n\t" + "ldr r3, [sp, #36]\n\t" + "ldr r4, [sp, #40]\n\t" + "ldr r8, [sp, #44]\n\t" + "str r2, [%[r], #32]\n\t" + "str r3, [%[r], #36]\n\t" + "str r4, [%[r], #40]\n\t" + "str r8, [%[r], #44]\n\t" + "ldr r2, [sp, #48]\n\t" + "ldr r3, [sp, #52]\n\t" + "ldr r4, [sp, #56]\n\t" + "ldr r8, [sp, #60]\n\t" + "str r2, [%[r], #48]\n\t" + "str r3, [%[r], #52]\n\t" + "str r4, [%[r], #56]\n\t" + "str r8, [%[r], #60]\n\t" + "ldr r2, [sp, #64]\n\t" + "ldr r3, [sp, #68]\n\t" + "ldr r4, [sp, #72]\n\t" + "ldr r8, [sp, #76]\n\t" + "str r2, [%[r], #64]\n\t" + "str r3, [%[r], #68]\n\t" + "str r4, [%[r], #72]\n\t" + "str r8, [%[r], #76]\n\t" + "ldr r2, [sp, #80]\n\t" + "ldr r3, [sp, #84]\n\t" + "ldr r4, [sp, #88]\n\t" + "ldr r8, [sp, #92]\n\t" + "str r2, [%[r], #80]\n\t" + "str r3, [%[r], #84]\n\t" + "str r4, [%[r], #88]\n\t" + "str r8, [%[r], #92]\n\t" + "ldr r2, [sp, #96]\n\t" + "ldr r3, [sp, #100]\n\t" + "ldr r4, [sp, #104]\n\t" + "ldr r8, [sp, #108]\n\t" + "str r2, [%[r], #96]\n\t" + "str r3, [%[r], #100]\n\t" + "str r4, [%[r], #104]\n\t" + "str r8, [%[r], #108]\n\t" + "ldr r2, [sp, #112]\n\t" + "ldr r3, [sp, #116]\n\t" + "ldr r4, [sp, #120]\n\t" + "ldr r8, [sp, #124]\n\t" + "str r2, [%[r], #112]\n\t" + "str r3, [%[r], #116]\n\t" + "str r4, [%[r], #120]\n\t" + "str r8, [%[r], #124]\n\t" + "ldr r2, [sp, #128]\n\t" + "ldr r3, [sp, #132]\n\t" + "ldr r4, [sp, #136]\n\t" + "ldr r8, [sp, #140]\n\t" + "str r2, [%[r], #128]\n\t" + "str r3, [%[r], #132]\n\t" + "str r4, [%[r], #136]\n\t" + "str r8, [%[r], #140]\n\t" + "ldr r2, [sp, #144]\n\t" + "ldr r3, [sp, #148]\n\t" + "ldr r4, [sp, #152]\n\t" + "ldr r8, [sp, #156]\n\t" + "str r2, [%[r], #144]\n\t" + "str r3, [%[r], #148]\n\t" + "str r4, [%[r], #152]\n\t" + "str r8, [%[r], #156]\n\t" + "ldr r2, [sp, #160]\n\t" + "ldr r3, [sp, #164]\n\t" + "ldr r4, [sp, #168]\n\t" + "ldr r8, [sp, #172]\n\t" + "str r2, [%[r], #160]\n\t" + "str r3, [%[r], #164]\n\t" + "str r4, [%[r], #168]\n\t" + "str r8, [%[r], #172]\n\t" + "ldr r2, [sp, #176]\n\t" + "ldr r3, [sp, #180]\n\t" + "ldr r4, [sp, #184]\n\t" + "ldr r8, [sp, #188]\n\t" + "str r2, [%[r], #176]\n\t" + "str r3, [%[r], #180]\n\t" + "str r4, [%[r], #184]\n\t" + "str r8, [%[r], #188]\n\t" + "ldr r2, [sp, #192]\n\t" + "ldr r3, [sp, #196]\n\t" + "ldr r4, [sp, #200]\n\t" + "ldr r8, [sp, #204]\n\t" + "str r2, [%[r], #192]\n\t" + "str r3, [%[r], #196]\n\t" + "str r4, [%[r], #200]\n\t" + "str r8, [%[r], #204]\n\t" + "ldr r2, [sp, #208]\n\t" + "ldr r3, [sp, #212]\n\t" + "ldr r4, [sp, #216]\n\t" + "ldr r8, [sp, #220]\n\t" + "str r2, [%[r], #208]\n\t" + "str r3, [%[r], #212]\n\t" + "str r4, [%[r], #216]\n\t" + "str r8, [%[r], #220]\n\t" + "ldr r2, [sp, #224]\n\t" + "ldr r3, [sp, #228]\n\t" + "ldr r4, [sp, #232]\n\t" + "ldr r8, [sp, #236]\n\t" + "str r2, [%[r], #224]\n\t" + "str r3, [%[r], #228]\n\t" + "str r4, [%[r], #232]\n\t" + "str r8, [%[r], #236]\n\t" + "ldr r2, [sp, #240]\n\t" + "ldr r3, [sp, #244]\n\t" + "ldr r4, [sp, #248]\n\t" + "ldr r8, [sp, #252]\n\t" + "str r2, [%[r], #240]\n\t" + "str r3, [%[r], #244]\n\t" + "str r4, [%[r], #248]\n\t" + "str r8, [%[r], #252]\n\t" + "add sp, sp, #256\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[128]; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit u; + + u = sp_2048_add_64(a1, a, &a[64]); + sp_2048_sqr_64(z1, a1); + sp_2048_sqr_64(z2, &a[64]); + sp_2048_sqr_64(z0, a); + sp_2048_mask_64(r + 128, a1, 0 - u); + u += sp_2048_add_64(r + 128, r + 128, r + 128); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add r12, %[a], #512\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a]], #4\n\t" + "ldr r5, [%[a]], #4\n\t" + "ldr r6, [%[a]], #4\n\t" + "ldr r7, [%[a]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "ldr r14, [%[b]], #4\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r14\n\t" + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]], #4\n\t" + "str r6, [%[r]], #4\n\t" + "str r7, [%[r]], #4\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r14, #0\n\t" + "add r12, %[a], #512\n\t" + "\n1:\n\t" + "subs %[c], r14, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b]], #4\n\t" + "ldr r8, [%[b]], #4\n\t" + "ldr r9, [%[b]], #4\n\t" + "ldr r10, [%[b]], #4\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "str r3, [%[a]], #4\n\t" + "str r4, [%[a]], #4\n\t" + "str r5, [%[a]], #4\n\t" + "str r6, [%[a]], #4\n\t" + "sbc %[c], r14, r14\n\t" + "cmp %[a], r12\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + __asm__ __volatile__ ( + "sub sp, sp, #1024\n\t" + "mov r5, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #508\n\t" + "it cc\n\t" + "movcc r3, #0\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r12, [%[b], r4]\n\t" + "umull r9, r10, r14, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #512\n\t" + "beq 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #1016\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "sub sp, sp, #1024\n\t" + "mov r12, #0\n\t" + "mov r6, #0\n\t" + "mov r7, #0\n\t" + "mov r8, #0\n\t" + "mov r5, #0\n\t" + "\n1:\n\t" + "subs r3, r5, #508\n\t" + "it cc\n\t" + "movcc r3, r12\n\t" + "sub r4, r5, r3\n\t" + "\n2:\n\t" + "cmp r4, r3\n\t" + "beq 4f\n\t" + "ldr r14, [%[a], r3]\n\t" + "ldr r9, [%[a], r4]\n\t" + "umull r9, r10, r14, r9\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "ldr r14, [%[a], r3]\n\t" + "umull r9, r10, r14, r14\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, r12\n\t" + "\n5:\n\t" + "add r3, r3, #4\n\t" + "sub r4, r4, #4\n\t" + "cmp r3, #512\n\t" + "beq 3f\n\t" + "cmp r3, r4\n\t" + "bgt 3f\n\t" + "cmp r3, r5\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "str r6, [sp, r5]\n\t" + "mov r6, r7\n\t" + "mov r7, r8\n\t" + "mov r8, #0\n\t" + "add r5, r5, #4\n\t" + "cmp r5, #1016\n\t" + "ble 1b\n\t" + "str r6, [sp, r5]\n\t" + "\n4:\n\t" + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r3, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "add sp, sp, #16\n\t" + "add %[r], %[r], #16\n\t" + "subs r5, r5, #16\n\t" + "bgt 4b\n\t" + : [r] "+r" (r) + : [a] "r" (a) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #512\n\t" + "blt 1b\n\t" + "str r3, [%[r], #512]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #28]\n\t" + "# A[8] * B\n\t" + "ldr r8, [%[a], #32]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[9] * B\n\t" + "ldr r8, [%[a], #36]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[10] * B\n\t" + "ldr r8, [%[a], #40]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[11] * B\n\t" + "ldr r8, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[12] * B\n\t" + "ldr r8, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[13] * B\n\t" + "ldr r8, [%[a], #52]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[14] * B\n\t" + "ldr r8, [%[a], #56]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[15] * B\n\t" + "ldr r8, [%[a], #60]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[16] * B\n\t" + "ldr r8, [%[a], #64]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[17] * B\n\t" + "ldr r8, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[18] * B\n\t" + "ldr r8, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[19] * B\n\t" + "ldr r8, [%[a], #76]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[20] * B\n\t" + "ldr r8, [%[a], #80]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[21] * B\n\t" + "ldr r8, [%[a], #84]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[22] * B\n\t" + "ldr r8, [%[a], #88]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #88]\n\t" + "# A[23] * B\n\t" + "ldr r8, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #92]\n\t" + "# A[24] * B\n\t" + "ldr r8, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #96]\n\t" + "# A[25] * B\n\t" + "ldr r8, [%[a], #100]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #100]\n\t" + "# A[26] * B\n\t" + "ldr r8, [%[a], #104]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #104]\n\t" + "# A[27] * B\n\t" + "ldr r8, [%[a], #108]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #108]\n\t" + "# A[28] * B\n\t" + "ldr r8, [%[a], #112]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #112]\n\t" + "# A[29] * B\n\t" + "ldr r8, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #116]\n\t" + "# A[30] * B\n\t" + "ldr r8, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #120]\n\t" + "# A[31] * B\n\t" + "ldr r8, [%[a], #124]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #124]\n\t" + "# A[32] * B\n\t" + "ldr r8, [%[a], #128]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #128]\n\t" + "# A[33] * B\n\t" + "ldr r8, [%[a], #132]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #132]\n\t" + "# A[34] * B\n\t" + "ldr r8, [%[a], #136]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #136]\n\t" + "# A[35] * B\n\t" + "ldr r8, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #140]\n\t" + "# A[36] * B\n\t" + "ldr r8, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #144]\n\t" + "# A[37] * B\n\t" + "ldr r8, [%[a], #148]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #148]\n\t" + "# A[38] * B\n\t" + "ldr r8, [%[a], #152]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #152]\n\t" + "# A[39] * B\n\t" + "ldr r8, [%[a], #156]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #156]\n\t" + "# A[40] * B\n\t" + "ldr r8, [%[a], #160]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #160]\n\t" + "# A[41] * B\n\t" + "ldr r8, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #164]\n\t" + "# A[42] * B\n\t" + "ldr r8, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #168]\n\t" + "# A[43] * B\n\t" + "ldr r8, [%[a], #172]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #172]\n\t" + "# A[44] * B\n\t" + "ldr r8, [%[a], #176]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #176]\n\t" + "# A[45] * B\n\t" + "ldr r8, [%[a], #180]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #180]\n\t" + "# A[46] * B\n\t" + "ldr r8, [%[a], #184]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #184]\n\t" + "# A[47] * B\n\t" + "ldr r8, [%[a], #188]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #188]\n\t" + "# A[48] * B\n\t" + "ldr r8, [%[a], #192]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #192]\n\t" + "# A[49] * B\n\t" + "ldr r8, [%[a], #196]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #196]\n\t" + "# A[50] * B\n\t" + "ldr r8, [%[a], #200]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #200]\n\t" + "# A[51] * B\n\t" + "ldr r8, [%[a], #204]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #204]\n\t" + "# A[52] * B\n\t" + "ldr r8, [%[a], #208]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #208]\n\t" + "# A[53] * B\n\t" + "ldr r8, [%[a], #212]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #212]\n\t" + "# A[54] * B\n\t" + "ldr r8, [%[a], #216]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #216]\n\t" + "# A[55] * B\n\t" + "ldr r8, [%[a], #220]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #220]\n\t" + "# A[56] * B\n\t" + "ldr r8, [%[a], #224]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #224]\n\t" + "# A[57] * B\n\t" + "ldr r8, [%[a], #228]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #228]\n\t" + "# A[58] * B\n\t" + "ldr r8, [%[a], #232]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #232]\n\t" + "# A[59] * B\n\t" + "ldr r8, [%[a], #236]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #236]\n\t" + "# A[60] * B\n\t" + "ldr r8, [%[a], #240]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #240]\n\t" + "# A[61] * B\n\t" + "ldr r8, [%[a], #244]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #244]\n\t" + "# A[62] * B\n\t" + "ldr r8, [%[a], #248]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #248]\n\t" + "# A[63] * B\n\t" + "ldr r8, [%[a], #252]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #252]\n\t" + "# A[64] * B\n\t" + "ldr r8, [%[a], #256]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #256]\n\t" + "# A[65] * B\n\t" + "ldr r8, [%[a], #260]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #260]\n\t" + "# A[66] * B\n\t" + "ldr r8, [%[a], #264]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #264]\n\t" + "# A[67] * B\n\t" + "ldr r8, [%[a], #268]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #268]\n\t" + "# A[68] * B\n\t" + "ldr r8, [%[a], #272]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #272]\n\t" + "# A[69] * B\n\t" + "ldr r8, [%[a], #276]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #276]\n\t" + "# A[70] * B\n\t" + "ldr r8, [%[a], #280]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #280]\n\t" + "# A[71] * B\n\t" + "ldr r8, [%[a], #284]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #284]\n\t" + "# A[72] * B\n\t" + "ldr r8, [%[a], #288]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #288]\n\t" + "# A[73] * B\n\t" + "ldr r8, [%[a], #292]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #292]\n\t" + "# A[74] * B\n\t" + "ldr r8, [%[a], #296]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #296]\n\t" + "# A[75] * B\n\t" + "ldr r8, [%[a], #300]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #300]\n\t" + "# A[76] * B\n\t" + "ldr r8, [%[a], #304]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #304]\n\t" + "# A[77] * B\n\t" + "ldr r8, [%[a], #308]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #308]\n\t" + "# A[78] * B\n\t" + "ldr r8, [%[a], #312]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #312]\n\t" + "# A[79] * B\n\t" + "ldr r8, [%[a], #316]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #316]\n\t" + "# A[80] * B\n\t" + "ldr r8, [%[a], #320]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #320]\n\t" + "# A[81] * B\n\t" + "ldr r8, [%[a], #324]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #324]\n\t" + "# A[82] * B\n\t" + "ldr r8, [%[a], #328]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #328]\n\t" + "# A[83] * B\n\t" + "ldr r8, [%[a], #332]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #332]\n\t" + "# A[84] * B\n\t" + "ldr r8, [%[a], #336]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #336]\n\t" + "# A[85] * B\n\t" + "ldr r8, [%[a], #340]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #340]\n\t" + "# A[86] * B\n\t" + "ldr r8, [%[a], #344]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #344]\n\t" + "# A[87] * B\n\t" + "ldr r8, [%[a], #348]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #348]\n\t" + "# A[88] * B\n\t" + "ldr r8, [%[a], #352]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #352]\n\t" + "# A[89] * B\n\t" + "ldr r8, [%[a], #356]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #356]\n\t" + "# A[90] * B\n\t" + "ldr r8, [%[a], #360]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #360]\n\t" + "# A[91] * B\n\t" + "ldr r8, [%[a], #364]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #364]\n\t" + "# A[92] * B\n\t" + "ldr r8, [%[a], #368]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #368]\n\t" + "# A[93] * B\n\t" + "ldr r8, [%[a], #372]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #372]\n\t" + "# A[94] * B\n\t" + "ldr r8, [%[a], #376]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #376]\n\t" + "# A[95] * B\n\t" + "ldr r8, [%[a], #380]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #380]\n\t" + "# A[96] * B\n\t" + "ldr r8, [%[a], #384]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #384]\n\t" + "# A[97] * B\n\t" + "ldr r8, [%[a], #388]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #388]\n\t" + "# A[98] * B\n\t" + "ldr r8, [%[a], #392]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #392]\n\t" + "# A[99] * B\n\t" + "ldr r8, [%[a], #396]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #396]\n\t" + "# A[100] * B\n\t" + "ldr r8, [%[a], #400]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #400]\n\t" + "# A[101] * B\n\t" + "ldr r8, [%[a], #404]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #404]\n\t" + "# A[102] * B\n\t" + "ldr r8, [%[a], #408]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #408]\n\t" + "# A[103] * B\n\t" + "ldr r8, [%[a], #412]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #412]\n\t" + "# A[104] * B\n\t" + "ldr r8, [%[a], #416]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #416]\n\t" + "# A[105] * B\n\t" + "ldr r8, [%[a], #420]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #420]\n\t" + "# A[106] * B\n\t" + "ldr r8, [%[a], #424]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #424]\n\t" + "# A[107] * B\n\t" + "ldr r8, [%[a], #428]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #428]\n\t" + "# A[108] * B\n\t" + "ldr r8, [%[a], #432]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #432]\n\t" + "# A[109] * B\n\t" + "ldr r8, [%[a], #436]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #436]\n\t" + "# A[110] * B\n\t" + "ldr r8, [%[a], #440]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #440]\n\t" + "# A[111] * B\n\t" + "ldr r8, [%[a], #444]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #444]\n\t" + "# A[112] * B\n\t" + "ldr r8, [%[a], #448]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #448]\n\t" + "# A[113] * B\n\t" + "ldr r8, [%[a], #452]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #452]\n\t" + "# A[114] * B\n\t" + "ldr r8, [%[a], #456]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #456]\n\t" + "# A[115] * B\n\t" + "ldr r8, [%[a], #460]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #460]\n\t" + "# A[116] * B\n\t" + "ldr r8, [%[a], #464]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #464]\n\t" + "# A[117] * B\n\t" + "ldr r8, [%[a], #468]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #468]\n\t" + "# A[118] * B\n\t" + "ldr r8, [%[a], #472]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #472]\n\t" + "# A[119] * B\n\t" + "ldr r8, [%[a], #476]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #476]\n\t" + "# A[120] * B\n\t" + "ldr r8, [%[a], #480]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #480]\n\t" + "# A[121] * B\n\t" + "ldr r8, [%[a], #484]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #484]\n\t" + "# A[122] * B\n\t" + "ldr r8, [%[a], #488]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #488]\n\t" + "# A[123] * B\n\t" + "ldr r8, [%[a], #492]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #492]\n\t" + "# A[124] * B\n\t" + "ldr r8, [%[a], #496]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #496]\n\t" + "# A[125] * B\n\t" + "ldr r8, [%[a], #500]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #500]\n\t" + "# A[126] * B\n\t" + "ldr r8, [%[a], #504]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #504]\n\t" + "# A[127] * B\n\t" + "ldr r8, [%[a], #508]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "str r4, [%[r], #508]\n\t" + "str r5, [%[r], #512]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A signle precision number. + */ +static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 128); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_128(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "subs %[c], r9, %[c]\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #512\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "subs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r6, [%[a], #132]\n\t" + "ldr r5, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #128]\n\t" + "str r6, [%[r], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r6, [%[a], #140]\n\t" + "ldr r5, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r6, [%[a], #148]\n\t" + "ldr r5, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #144]\n\t" + "str r6, [%[r], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r6, [%[a], #156]\n\t" + "ldr r5, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r6, [%[a], #164]\n\t" + "ldr r5, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #160]\n\t" + "str r6, [%[r], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r6, [%[a], #172]\n\t" + "ldr r5, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r6, [%[a], #180]\n\t" + "ldr r5, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #176]\n\t" + "str r6, [%[r], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r6, [%[a], #188]\n\t" + "ldr r5, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r6, [%[a], #196]\n\t" + "ldr r5, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #192]\n\t" + "str r6, [%[r], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r6, [%[a], #204]\n\t" + "ldr r5, [%[b], #200]\n\t" + "ldr r7, [%[b], #204]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #200]\n\t" + "str r6, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r6, [%[a], #212]\n\t" + "ldr r5, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #208]\n\t" + "str r6, [%[r], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r6, [%[a], #220]\n\t" + "ldr r5, [%[b], #216]\n\t" + "ldr r7, [%[b], #220]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #216]\n\t" + "str r6, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r6, [%[a], #228]\n\t" + "ldr r5, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #224]\n\t" + "str r6, [%[r], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r6, [%[a], #236]\n\t" + "ldr r5, [%[b], #232]\n\t" + "ldr r7, [%[b], #236]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #232]\n\t" + "str r6, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r6, [%[a], #244]\n\t" + "ldr r5, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #240]\n\t" + "str r6, [%[r], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r6, [%[a], #252]\n\t" + "ldr r5, [%[b], #248]\n\t" + "ldr r7, [%[b], #252]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #248]\n\t" + "str r6, [%[r], #252]\n\t" + "ldr r4, [%[a], #256]\n\t" + "ldr r6, [%[a], #260]\n\t" + "ldr r5, [%[b], #256]\n\t" + "ldr r7, [%[b], #260]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #256]\n\t" + "str r6, [%[r], #260]\n\t" + "ldr r4, [%[a], #264]\n\t" + "ldr r6, [%[a], #268]\n\t" + "ldr r5, [%[b], #264]\n\t" + "ldr r7, [%[b], #268]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #264]\n\t" + "str r6, [%[r], #268]\n\t" + "ldr r4, [%[a], #272]\n\t" + "ldr r6, [%[a], #276]\n\t" + "ldr r5, [%[b], #272]\n\t" + "ldr r7, [%[b], #276]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #272]\n\t" + "str r6, [%[r], #276]\n\t" + "ldr r4, [%[a], #280]\n\t" + "ldr r6, [%[a], #284]\n\t" + "ldr r5, [%[b], #280]\n\t" + "ldr r7, [%[b], #284]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #280]\n\t" + "str r6, [%[r], #284]\n\t" + "ldr r4, [%[a], #288]\n\t" + "ldr r6, [%[a], #292]\n\t" + "ldr r5, [%[b], #288]\n\t" + "ldr r7, [%[b], #292]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #288]\n\t" + "str r6, [%[r], #292]\n\t" + "ldr r4, [%[a], #296]\n\t" + "ldr r6, [%[a], #300]\n\t" + "ldr r5, [%[b], #296]\n\t" + "ldr r7, [%[b], #300]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #296]\n\t" + "str r6, [%[r], #300]\n\t" + "ldr r4, [%[a], #304]\n\t" + "ldr r6, [%[a], #308]\n\t" + "ldr r5, [%[b], #304]\n\t" + "ldr r7, [%[b], #308]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #304]\n\t" + "str r6, [%[r], #308]\n\t" + "ldr r4, [%[a], #312]\n\t" + "ldr r6, [%[a], #316]\n\t" + "ldr r5, [%[b], #312]\n\t" + "ldr r7, [%[b], #316]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #312]\n\t" + "str r6, [%[r], #316]\n\t" + "ldr r4, [%[a], #320]\n\t" + "ldr r6, [%[a], #324]\n\t" + "ldr r5, [%[b], #320]\n\t" + "ldr r7, [%[b], #324]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #320]\n\t" + "str r6, [%[r], #324]\n\t" + "ldr r4, [%[a], #328]\n\t" + "ldr r6, [%[a], #332]\n\t" + "ldr r5, [%[b], #328]\n\t" + "ldr r7, [%[b], #332]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #328]\n\t" + "str r6, [%[r], #332]\n\t" + "ldr r4, [%[a], #336]\n\t" + "ldr r6, [%[a], #340]\n\t" + "ldr r5, [%[b], #336]\n\t" + "ldr r7, [%[b], #340]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #336]\n\t" + "str r6, [%[r], #340]\n\t" + "ldr r4, [%[a], #344]\n\t" + "ldr r6, [%[a], #348]\n\t" + "ldr r5, [%[b], #344]\n\t" + "ldr r7, [%[b], #348]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #344]\n\t" + "str r6, [%[r], #348]\n\t" + "ldr r4, [%[a], #352]\n\t" + "ldr r6, [%[a], #356]\n\t" + "ldr r5, [%[b], #352]\n\t" + "ldr r7, [%[b], #356]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #352]\n\t" + "str r6, [%[r], #356]\n\t" + "ldr r4, [%[a], #360]\n\t" + "ldr r6, [%[a], #364]\n\t" + "ldr r5, [%[b], #360]\n\t" + "ldr r7, [%[b], #364]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #360]\n\t" + "str r6, [%[r], #364]\n\t" + "ldr r4, [%[a], #368]\n\t" + "ldr r6, [%[a], #372]\n\t" + "ldr r5, [%[b], #368]\n\t" + "ldr r7, [%[b], #372]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #368]\n\t" + "str r6, [%[r], #372]\n\t" + "ldr r4, [%[a], #376]\n\t" + "ldr r6, [%[a], #380]\n\t" + "ldr r5, [%[b], #376]\n\t" + "ldr r7, [%[b], #380]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #376]\n\t" + "str r6, [%[r], #380]\n\t" + "ldr r4, [%[a], #384]\n\t" + "ldr r6, [%[a], #388]\n\t" + "ldr r5, [%[b], #384]\n\t" + "ldr r7, [%[b], #388]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #384]\n\t" + "str r6, [%[r], #388]\n\t" + "ldr r4, [%[a], #392]\n\t" + "ldr r6, [%[a], #396]\n\t" + "ldr r5, [%[b], #392]\n\t" + "ldr r7, [%[b], #396]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #392]\n\t" + "str r6, [%[r], #396]\n\t" + "ldr r4, [%[a], #400]\n\t" + "ldr r6, [%[a], #404]\n\t" + "ldr r5, [%[b], #400]\n\t" + "ldr r7, [%[b], #404]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #400]\n\t" + "str r6, [%[r], #404]\n\t" + "ldr r4, [%[a], #408]\n\t" + "ldr r6, [%[a], #412]\n\t" + "ldr r5, [%[b], #408]\n\t" + "ldr r7, [%[b], #412]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #408]\n\t" + "str r6, [%[r], #412]\n\t" + "ldr r4, [%[a], #416]\n\t" + "ldr r6, [%[a], #420]\n\t" + "ldr r5, [%[b], #416]\n\t" + "ldr r7, [%[b], #420]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #416]\n\t" + "str r6, [%[r], #420]\n\t" + "ldr r4, [%[a], #424]\n\t" + "ldr r6, [%[a], #428]\n\t" + "ldr r5, [%[b], #424]\n\t" + "ldr r7, [%[b], #428]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #424]\n\t" + "str r6, [%[r], #428]\n\t" + "ldr r4, [%[a], #432]\n\t" + "ldr r6, [%[a], #436]\n\t" + "ldr r5, [%[b], #432]\n\t" + "ldr r7, [%[b], #436]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #432]\n\t" + "str r6, [%[r], #436]\n\t" + "ldr r4, [%[a], #440]\n\t" + "ldr r6, [%[a], #444]\n\t" + "ldr r5, [%[b], #440]\n\t" + "ldr r7, [%[b], #444]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #440]\n\t" + "str r6, [%[r], #444]\n\t" + "ldr r4, [%[a], #448]\n\t" + "ldr r6, [%[a], #452]\n\t" + "ldr r5, [%[b], #448]\n\t" + "ldr r7, [%[b], #452]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #448]\n\t" + "str r6, [%[r], #452]\n\t" + "ldr r4, [%[a], #456]\n\t" + "ldr r6, [%[a], #460]\n\t" + "ldr r5, [%[b], #456]\n\t" + "ldr r7, [%[b], #460]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #456]\n\t" + "str r6, [%[r], #460]\n\t" + "ldr r4, [%[a], #464]\n\t" + "ldr r6, [%[a], #468]\n\t" + "ldr r5, [%[b], #464]\n\t" + "ldr r7, [%[b], #468]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #464]\n\t" + "str r6, [%[r], #468]\n\t" + "ldr r4, [%[a], #472]\n\t" + "ldr r6, [%[a], #476]\n\t" + "ldr r5, [%[b], #472]\n\t" + "ldr r7, [%[b], #476]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #472]\n\t" + "str r6, [%[r], #476]\n\t" + "ldr r4, [%[a], #480]\n\t" + "ldr r6, [%[a], #484]\n\t" + "ldr r5, [%[b], #480]\n\t" + "ldr r7, [%[b], #484]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #480]\n\t" + "str r6, [%[r], #484]\n\t" + "ldr r4, [%[a], #488]\n\t" + "ldr r6, [%[a], #492]\n\t" + "ldr r5, [%[b], #488]\n\t" + "ldr r7, [%[b], #492]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #488]\n\t" + "str r6, [%[r], #492]\n\t" + "ldr r4, [%[a], #496]\n\t" + "ldr r6, [%[a], #500]\n\t" + "ldr r5, [%[b], #496]\n\t" + "ldr r7, [%[b], #500]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #496]\n\t" + "str r6, [%[r], #500]\n\t" + "ldr r4, [%[a], #504]\n\t" + "ldr r6, [%[a], #508]\n\t" + "ldr r5, [%[b], #504]\n\t" + "ldr r7, [%[b], #508]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbcs r6, r6, r7\n\t" + "str r4, [%[r], #504]\n\t" + "str r6, [%[r], #508]\n\t" + "sbc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "# i = 0\n\t" + "mov r12, #0\n\t" + "ldr r10, [%[a], #0]\n\t" + "ldr r14, [%[a], #4]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul r8, %[mp], r10\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldr r7, [%[m], #0]\n\t" + "ldr r9, [%[a], #0]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" + "# a[i+1] += m[1] * mu\n\t" + "ldr r7, [%[m], #4]\n\t" + "ldr r9, [%[a], #4]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r10, r14, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r10, r10, r5\n\t" + "adc r4, r4, #0\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr r7, [%[m], #8]\n\t" + "ldr r14, [%[a], #8]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r14, r14, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r14, r14, r4\n\t" + "adc r5, r5, #0\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr r7, [%[m], #12]\n\t" + "ldr r9, [%[a], #12]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr r7, [%[m], #16]\n\t" + "ldr r9, [%[a], #16]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr r7, [%[m], #20]\n\t" + "ldr r9, [%[a], #20]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr r7, [%[m], #24]\n\t" + "ldr r9, [%[a], #24]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr r7, [%[m], #28]\n\t" + "ldr r9, [%[a], #28]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr r7, [%[m], #32]\n\t" + "ldr r9, [%[a], #32]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr r7, [%[m], #36]\n\t" + "ldr r9, [%[a], #36]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr r7, [%[m], #40]\n\t" + "ldr r9, [%[a], #40]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr r7, [%[m], #44]\n\t" + "ldr r9, [%[a], #44]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr r7, [%[m], #48]\n\t" + "ldr r9, [%[a], #48]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr r7, [%[m], #52]\n\t" + "ldr r9, [%[a], #52]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr r7, [%[m], #56]\n\t" + "ldr r9, [%[a], #56]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr r7, [%[m], #60]\n\t" + "ldr r9, [%[a], #60]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr r7, [%[m], #64]\n\t" + "ldr r9, [%[a], #64]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr r7, [%[m], #68]\n\t" + "ldr r9, [%[a], #68]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #68]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr r7, [%[m], #72]\n\t" + "ldr r9, [%[a], #72]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr r7, [%[m], #76]\n\t" + "ldr r9, [%[a], #76]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #76]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr r7, [%[m], #80]\n\t" + "ldr r9, [%[a], #80]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr r7, [%[m], #84]\n\t" + "ldr r9, [%[a], #84]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #84]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr r7, [%[m], #88]\n\t" + "ldr r9, [%[a], #88]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr r7, [%[m], #92]\n\t" + "ldr r9, [%[a], #92]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #92]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr r7, [%[m], #96]\n\t" + "ldr r9, [%[a], #96]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr r7, [%[m], #100]\n\t" + "ldr r9, [%[a], #100]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #100]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr r7, [%[m], #104]\n\t" + "ldr r9, [%[a], #104]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr r7, [%[m], #108]\n\t" + "ldr r9, [%[a], #108]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #108]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr r7, [%[m], #112]\n\t" + "ldr r9, [%[a], #112]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr r7, [%[m], #116]\n\t" + "ldr r9, [%[a], #116]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #116]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr r7, [%[m], #120]\n\t" + "ldr r9, [%[a], #120]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr r7, [%[m], #124]\n\t" + "ldr r9, [%[a], #124]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #124]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+32] += m[32] * mu\n\t" + "ldr r7, [%[m], #128]\n\t" + "ldr r9, [%[a], #128]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+33] += m[33] * mu\n\t" + "ldr r7, [%[m], #132]\n\t" + "ldr r9, [%[a], #132]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #132]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+34] += m[34] * mu\n\t" + "ldr r7, [%[m], #136]\n\t" + "ldr r9, [%[a], #136]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+35] += m[35] * mu\n\t" + "ldr r7, [%[m], #140]\n\t" + "ldr r9, [%[a], #140]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #140]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+36] += m[36] * mu\n\t" + "ldr r7, [%[m], #144]\n\t" + "ldr r9, [%[a], #144]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+37] += m[37] * mu\n\t" + "ldr r7, [%[m], #148]\n\t" + "ldr r9, [%[a], #148]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #148]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+38] += m[38] * mu\n\t" + "ldr r7, [%[m], #152]\n\t" + "ldr r9, [%[a], #152]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+39] += m[39] * mu\n\t" + "ldr r7, [%[m], #156]\n\t" + "ldr r9, [%[a], #156]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #156]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+40] += m[40] * mu\n\t" + "ldr r7, [%[m], #160]\n\t" + "ldr r9, [%[a], #160]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+41] += m[41] * mu\n\t" + "ldr r7, [%[m], #164]\n\t" + "ldr r9, [%[a], #164]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #164]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+42] += m[42] * mu\n\t" + "ldr r7, [%[m], #168]\n\t" + "ldr r9, [%[a], #168]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+43] += m[43] * mu\n\t" + "ldr r7, [%[m], #172]\n\t" + "ldr r9, [%[a], #172]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #172]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+44] += m[44] * mu\n\t" + "ldr r7, [%[m], #176]\n\t" + "ldr r9, [%[a], #176]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+45] += m[45] * mu\n\t" + "ldr r7, [%[m], #180]\n\t" + "ldr r9, [%[a], #180]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #180]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+46] += m[46] * mu\n\t" + "ldr r7, [%[m], #184]\n\t" + "ldr r9, [%[a], #184]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+47] += m[47] * mu\n\t" + "ldr r7, [%[m], #188]\n\t" + "ldr r9, [%[a], #188]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #188]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+48] += m[48] * mu\n\t" + "ldr r7, [%[m], #192]\n\t" + "ldr r9, [%[a], #192]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+49] += m[49] * mu\n\t" + "ldr r7, [%[m], #196]\n\t" + "ldr r9, [%[a], #196]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #196]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+50] += m[50] * mu\n\t" + "ldr r7, [%[m], #200]\n\t" + "ldr r9, [%[a], #200]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+51] += m[51] * mu\n\t" + "ldr r7, [%[m], #204]\n\t" + "ldr r9, [%[a], #204]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #204]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+52] += m[52] * mu\n\t" + "ldr r7, [%[m], #208]\n\t" + "ldr r9, [%[a], #208]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+53] += m[53] * mu\n\t" + "ldr r7, [%[m], #212]\n\t" + "ldr r9, [%[a], #212]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #212]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+54] += m[54] * mu\n\t" + "ldr r7, [%[m], #216]\n\t" + "ldr r9, [%[a], #216]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+55] += m[55] * mu\n\t" + "ldr r7, [%[m], #220]\n\t" + "ldr r9, [%[a], #220]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #220]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+56] += m[56] * mu\n\t" + "ldr r7, [%[m], #224]\n\t" + "ldr r9, [%[a], #224]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+57] += m[57] * mu\n\t" + "ldr r7, [%[m], #228]\n\t" + "ldr r9, [%[a], #228]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #228]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+58] += m[58] * mu\n\t" + "ldr r7, [%[m], #232]\n\t" + "ldr r9, [%[a], #232]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+59] += m[59] * mu\n\t" + "ldr r7, [%[m], #236]\n\t" + "ldr r9, [%[a], #236]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #236]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+60] += m[60] * mu\n\t" + "ldr r7, [%[m], #240]\n\t" + "ldr r9, [%[a], #240]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+61] += m[61] * mu\n\t" + "ldr r7, [%[m], #244]\n\t" + "ldr r9, [%[a], #244]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #244]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+62] += m[62] * mu\n\t" + "ldr r7, [%[m], #248]\n\t" + "ldr r9, [%[a], #248]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+63] += m[63] * mu\n\t" + "ldr r7, [%[m], #252]\n\t" + "ldr r9, [%[a], #252]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #252]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+64] += m[64] * mu\n\t" + "ldr r7, [%[m], #256]\n\t" + "ldr r9, [%[a], #256]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #256]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+65] += m[65] * mu\n\t" + "ldr r7, [%[m], #260]\n\t" + "ldr r9, [%[a], #260]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #260]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+66] += m[66] * mu\n\t" + "ldr r7, [%[m], #264]\n\t" + "ldr r9, [%[a], #264]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #264]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+67] += m[67] * mu\n\t" + "ldr r7, [%[m], #268]\n\t" + "ldr r9, [%[a], #268]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #268]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+68] += m[68] * mu\n\t" + "ldr r7, [%[m], #272]\n\t" + "ldr r9, [%[a], #272]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #272]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+69] += m[69] * mu\n\t" + "ldr r7, [%[m], #276]\n\t" + "ldr r9, [%[a], #276]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #276]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+70] += m[70] * mu\n\t" + "ldr r7, [%[m], #280]\n\t" + "ldr r9, [%[a], #280]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #280]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+71] += m[71] * mu\n\t" + "ldr r7, [%[m], #284]\n\t" + "ldr r9, [%[a], #284]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #284]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+72] += m[72] * mu\n\t" + "ldr r7, [%[m], #288]\n\t" + "ldr r9, [%[a], #288]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #288]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+73] += m[73] * mu\n\t" + "ldr r7, [%[m], #292]\n\t" + "ldr r9, [%[a], #292]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #292]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+74] += m[74] * mu\n\t" + "ldr r7, [%[m], #296]\n\t" + "ldr r9, [%[a], #296]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #296]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+75] += m[75] * mu\n\t" + "ldr r7, [%[m], #300]\n\t" + "ldr r9, [%[a], #300]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #300]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+76] += m[76] * mu\n\t" + "ldr r7, [%[m], #304]\n\t" + "ldr r9, [%[a], #304]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #304]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+77] += m[77] * mu\n\t" + "ldr r7, [%[m], #308]\n\t" + "ldr r9, [%[a], #308]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #308]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+78] += m[78] * mu\n\t" + "ldr r7, [%[m], #312]\n\t" + "ldr r9, [%[a], #312]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #312]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+79] += m[79] * mu\n\t" + "ldr r7, [%[m], #316]\n\t" + "ldr r9, [%[a], #316]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #316]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+80] += m[80] * mu\n\t" + "ldr r7, [%[m], #320]\n\t" + "ldr r9, [%[a], #320]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #320]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+81] += m[81] * mu\n\t" + "ldr r7, [%[m], #324]\n\t" + "ldr r9, [%[a], #324]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #324]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+82] += m[82] * mu\n\t" + "ldr r7, [%[m], #328]\n\t" + "ldr r9, [%[a], #328]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #328]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+83] += m[83] * mu\n\t" + "ldr r7, [%[m], #332]\n\t" + "ldr r9, [%[a], #332]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #332]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+84] += m[84] * mu\n\t" + "ldr r7, [%[m], #336]\n\t" + "ldr r9, [%[a], #336]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #336]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+85] += m[85] * mu\n\t" + "ldr r7, [%[m], #340]\n\t" + "ldr r9, [%[a], #340]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #340]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+86] += m[86] * mu\n\t" + "ldr r7, [%[m], #344]\n\t" + "ldr r9, [%[a], #344]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #344]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+87] += m[87] * mu\n\t" + "ldr r7, [%[m], #348]\n\t" + "ldr r9, [%[a], #348]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #348]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+88] += m[88] * mu\n\t" + "ldr r7, [%[m], #352]\n\t" + "ldr r9, [%[a], #352]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #352]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+89] += m[89] * mu\n\t" + "ldr r7, [%[m], #356]\n\t" + "ldr r9, [%[a], #356]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #356]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+90] += m[90] * mu\n\t" + "ldr r7, [%[m], #360]\n\t" + "ldr r9, [%[a], #360]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #360]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+91] += m[91] * mu\n\t" + "ldr r7, [%[m], #364]\n\t" + "ldr r9, [%[a], #364]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #364]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+92] += m[92] * mu\n\t" + "ldr r7, [%[m], #368]\n\t" + "ldr r9, [%[a], #368]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #368]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+93] += m[93] * mu\n\t" + "ldr r7, [%[m], #372]\n\t" + "ldr r9, [%[a], #372]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #372]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+94] += m[94] * mu\n\t" + "ldr r7, [%[m], #376]\n\t" + "ldr r9, [%[a], #376]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #376]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+95] += m[95] * mu\n\t" + "ldr r7, [%[m], #380]\n\t" + "ldr r9, [%[a], #380]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #380]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+96] += m[96] * mu\n\t" + "ldr r7, [%[m], #384]\n\t" + "ldr r9, [%[a], #384]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #384]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+97] += m[97] * mu\n\t" + "ldr r7, [%[m], #388]\n\t" + "ldr r9, [%[a], #388]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #388]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+98] += m[98] * mu\n\t" + "ldr r7, [%[m], #392]\n\t" + "ldr r9, [%[a], #392]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #392]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+99] += m[99] * mu\n\t" + "ldr r7, [%[m], #396]\n\t" + "ldr r9, [%[a], #396]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #396]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+100] += m[100] * mu\n\t" + "ldr r7, [%[m], #400]\n\t" + "ldr r9, [%[a], #400]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #400]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+101] += m[101] * mu\n\t" + "ldr r7, [%[m], #404]\n\t" + "ldr r9, [%[a], #404]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #404]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+102] += m[102] * mu\n\t" + "ldr r7, [%[m], #408]\n\t" + "ldr r9, [%[a], #408]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #408]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+103] += m[103] * mu\n\t" + "ldr r7, [%[m], #412]\n\t" + "ldr r9, [%[a], #412]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #412]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+104] += m[104] * mu\n\t" + "ldr r7, [%[m], #416]\n\t" + "ldr r9, [%[a], #416]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #416]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+105] += m[105] * mu\n\t" + "ldr r7, [%[m], #420]\n\t" + "ldr r9, [%[a], #420]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #420]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+106] += m[106] * mu\n\t" + "ldr r7, [%[m], #424]\n\t" + "ldr r9, [%[a], #424]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #424]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+107] += m[107] * mu\n\t" + "ldr r7, [%[m], #428]\n\t" + "ldr r9, [%[a], #428]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #428]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+108] += m[108] * mu\n\t" + "ldr r7, [%[m], #432]\n\t" + "ldr r9, [%[a], #432]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #432]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+109] += m[109] * mu\n\t" + "ldr r7, [%[m], #436]\n\t" + "ldr r9, [%[a], #436]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #436]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+110] += m[110] * mu\n\t" + "ldr r7, [%[m], #440]\n\t" + "ldr r9, [%[a], #440]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #440]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+111] += m[111] * mu\n\t" + "ldr r7, [%[m], #444]\n\t" + "ldr r9, [%[a], #444]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #444]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+112] += m[112] * mu\n\t" + "ldr r7, [%[m], #448]\n\t" + "ldr r9, [%[a], #448]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #448]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+113] += m[113] * mu\n\t" + "ldr r7, [%[m], #452]\n\t" + "ldr r9, [%[a], #452]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #452]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+114] += m[114] * mu\n\t" + "ldr r7, [%[m], #456]\n\t" + "ldr r9, [%[a], #456]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #456]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+115] += m[115] * mu\n\t" + "ldr r7, [%[m], #460]\n\t" + "ldr r9, [%[a], #460]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #460]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+116] += m[116] * mu\n\t" + "ldr r7, [%[m], #464]\n\t" + "ldr r9, [%[a], #464]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #464]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+117] += m[117] * mu\n\t" + "ldr r7, [%[m], #468]\n\t" + "ldr r9, [%[a], #468]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #468]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+118] += m[118] * mu\n\t" + "ldr r7, [%[m], #472]\n\t" + "ldr r9, [%[a], #472]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #472]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+119] += m[119] * mu\n\t" + "ldr r7, [%[m], #476]\n\t" + "ldr r9, [%[a], #476]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #476]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+120] += m[120] * mu\n\t" + "ldr r7, [%[m], #480]\n\t" + "ldr r9, [%[a], #480]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #480]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+121] += m[121] * mu\n\t" + "ldr r7, [%[m], #484]\n\t" + "ldr r9, [%[a], #484]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #484]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+122] += m[122] * mu\n\t" + "ldr r7, [%[m], #488]\n\t" + "ldr r9, [%[a], #488]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #488]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+123] += m[123] * mu\n\t" + "ldr r7, [%[m], #492]\n\t" + "ldr r9, [%[a], #492]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #492]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+124] += m[124] * mu\n\t" + "ldr r7, [%[m], #496]\n\t" + "ldr r9, [%[a], #496]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #496]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+125] += m[125] * mu\n\t" + "ldr r7, [%[m], #500]\n\t" + "ldr r9, [%[a], #500]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r4, r7, #0\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #500]\n\t" + "adc r4, r4, #0\n\t" + "# a[i+126] += m[126] * mu\n\t" + "ldr r7, [%[m], #504]\n\t" + "ldr r9, [%[a], #504]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r9, r9, r6\n\t" + "adc r5, r7, #0\n\t" + "adds r9, r9, r4\n\t" + "str r9, [%[a], #504]\n\t" + "adc r5, r5, #0\n\t" + "# a[i+127] += m[127] * mu\n\t" + "ldr r7, [%[m], #508]\n\t" + "ldr r9, [%[a], #508]\n\t" + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r7, r7, %[ca]\n\t" + "mov %[ca], #0\n\t" + "adc %[ca], %[ca], %[ca]\n\t" + "adds r9, r9, r5\n\t" + "str r9, [%[a], #508]\n\t" + "ldr r9, [%[a], #512]\n\t" + "adcs r9, r9, r7\n\t" + "str r9, [%[a], #512]\n\t" + "adc %[ca], %[ca], #0\n\t" + "# i += 1\n\t" + "add %[a], %[a], #4\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #512\n\t" + "blt 1b\n\t" + "str r10, [%[a], #0]\n\t" + "str r14, [%[a], #4]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + ); + + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_128(r, a, b); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_128(r, a); + sp_4096_mont_reduce_128(r, m, mp); +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, r5, #1\n\t" + "mov r6, %[d0]\n\t" + "mov r7, %[d1]\n\t" + "# Do top 32\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "movs r6, r6, lsl #1\n\t" + "adc r7, r7, r7\n\t" + "subs r8, r5, r7\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], %[r]\n\t" + "sub %[r], %[r], r8\n\t" + "and r8, r8, r5\n\t" + "subs r7, r7, r8\n\t" + "subs r4, r4, #1\n\t" + "bpl 1b\n\t" + "add %[r], %[r], %[r]\n\t" + "add %[r], %[r], #1\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "umull r4, r5, %[r], %[div]\n\t" + "subs r4, %[d0], r4\n\t" + "sbc r5, %[d1], r5\n\t" + "add %[r], %[r], r5\n\t" + "subs r8, %[div], r4\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r7", "r8" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<128; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 128; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = -1; + sp_digit one = 1; + + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "mov r6, #508\n\t" + "1:\n\t" + "ldr r4, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "subs r6, r6, #4\n\t" + "bcs 1b\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#else + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mov r3, #-1\n\t" + "ldr r4, [%[a], #508]\n\t" + "ldr r5, [%[b], #508]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #504]\n\t" + "ldr r5, [%[b], #504]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #500]\n\t" + "ldr r5, [%[b], #500]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #496]\n\t" + "ldr r5, [%[b], #496]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #492]\n\t" + "ldr r5, [%[b], #492]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #488]\n\t" + "ldr r5, [%[b], #488]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #484]\n\t" + "ldr r5, [%[b], #484]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #480]\n\t" + "ldr r5, [%[b], #480]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #476]\n\t" + "ldr r5, [%[b], #476]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #472]\n\t" + "ldr r5, [%[b], #472]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #468]\n\t" + "ldr r5, [%[b], #468]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #464]\n\t" + "ldr r5, [%[b], #464]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #460]\n\t" + "ldr r5, [%[b], #460]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #456]\n\t" + "ldr r5, [%[b], #456]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #452]\n\t" + "ldr r5, [%[b], #452]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #448]\n\t" + "ldr r5, [%[b], #448]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #444]\n\t" + "ldr r5, [%[b], #444]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #440]\n\t" + "ldr r5, [%[b], #440]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #436]\n\t" + "ldr r5, [%[b], #436]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #432]\n\t" + "ldr r5, [%[b], #432]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #428]\n\t" + "ldr r5, [%[b], #428]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #424]\n\t" + "ldr r5, [%[b], #424]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #420]\n\t" + "ldr r5, [%[b], #420]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #416]\n\t" + "ldr r5, [%[b], #416]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #412]\n\t" + "ldr r5, [%[b], #412]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #408]\n\t" + "ldr r5, [%[b], #408]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #404]\n\t" + "ldr r5, [%[b], #404]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #400]\n\t" + "ldr r5, [%[b], #400]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #396]\n\t" + "ldr r5, [%[b], #396]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #392]\n\t" + "ldr r5, [%[b], #392]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #388]\n\t" + "ldr r5, [%[b], #388]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #384]\n\t" + "ldr r5, [%[b], #384]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #380]\n\t" + "ldr r5, [%[b], #380]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #376]\n\t" + "ldr r5, [%[b], #376]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #372]\n\t" + "ldr r5, [%[b], #372]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #368]\n\t" + "ldr r5, [%[b], #368]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #364]\n\t" + "ldr r5, [%[b], #364]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #360]\n\t" + "ldr r5, [%[b], #360]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #356]\n\t" + "ldr r5, [%[b], #356]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #352]\n\t" + "ldr r5, [%[b], #352]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #348]\n\t" + "ldr r5, [%[b], #348]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #344]\n\t" + "ldr r5, [%[b], #344]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #340]\n\t" + "ldr r5, [%[b], #340]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #336]\n\t" + "ldr r5, [%[b], #336]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #332]\n\t" + "ldr r5, [%[b], #332]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #328]\n\t" + "ldr r5, [%[b], #328]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #324]\n\t" + "ldr r5, [%[b], #324]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #320]\n\t" + "ldr r5, [%[b], #320]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #316]\n\t" + "ldr r5, [%[b], #316]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #312]\n\t" + "ldr r5, [%[b], #312]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #308]\n\t" + "ldr r5, [%[b], #308]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #304]\n\t" + "ldr r5, [%[b], #304]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #300]\n\t" + "ldr r5, [%[b], #300]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #296]\n\t" + "ldr r5, [%[b], #296]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #292]\n\t" + "ldr r5, [%[b], #292]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #288]\n\t" + "ldr r5, [%[b], #288]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #284]\n\t" + "ldr r5, [%[b], #284]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #280]\n\t" + "ldr r5, [%[b], #280]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #276]\n\t" + "ldr r5, [%[b], #276]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #272]\n\t" + "ldr r5, [%[b], #272]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #268]\n\t" + "ldr r5, [%[b], #268]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #264]\n\t" + "ldr r5, [%[b], #264]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #260]\n\t" + "ldr r5, [%[b], #260]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #256]\n\t" + "ldr r5, [%[b], #256]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #252]\n\t" + "ldr r5, [%[b], #252]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r5, [%[b], #248]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #244]\n\t" + "ldr r5, [%[b], #244]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[b], #240]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #236]\n\t" + "ldr r5, [%[b], #236]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r5, [%[b], #232]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #228]\n\t" + "ldr r5, [%[b], #228]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[b], #224]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #220]\n\t" + "ldr r5, [%[b], #220]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r5, [%[b], #216]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #212]\n\t" + "ldr r5, [%[b], #212]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[b], #208]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #204]\n\t" + "ldr r5, [%[b], #204]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r5, [%[b], #200]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #196]\n\t" + "ldr r5, [%[b], #196]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[b], #192]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #188]\n\t" + "ldr r5, [%[b], #188]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[b], #184]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #180]\n\t" + "ldr r5, [%[b], #180]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[b], #176]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #172]\n\t" + "ldr r5, [%[b], #172]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[b], #168]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #164]\n\t" + "ldr r5, [%[b], #164]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[b], #160]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #156]\n\t" + "ldr r5, [%[b], #156]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[b], #152]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #148]\n\t" + "ldr r5, [%[b], #148]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[b], #144]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #140]\n\t" + "ldr r5, [%[b], #140]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[b], #136]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #132]\n\t" + "ldr r5, [%[b], #132]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[b], #128]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "and r4, r4, r3\n\t" + "and r5, r5, r3\n\t" + "subs r4, r4, r5\n\t" + "it hi\n\t" + "movhi %[r], %[one]\n\t" + "it lo\n\t" + "movlo %[r], r3\n\t" + "it ne\n\t" + "movne r3, r7\n\t" + "eor %[r], %[r], r3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "r3", "r4", "r5", "r6", "r7" + ); +#endif + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128(a, m, NULL, r); +} + +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + if (t1[128 + i] != 0) { + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + if (t1[128 + i] != 0) + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + } + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][256]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][256]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_128(t[16], t[ 8], m, mp); + sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_128(t[18], t[ 9], m, mp); + sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_128(t[20], t[10], m, mp); + sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_128(t[22], t[11], m, mp); + sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_128(t[24], t[12], m, mp); + sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_128(t[26], t[13], m, mp); + sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_128(t[28], t[14], m, mp); + sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_128(t[30], t[15], m, mp); + sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[256], md[128], rd[256]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit *ah; + sp_digit* m; + sp_digit* r; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 128 * 2; + m = r + 128 * 2; + ah = a + 128; + } +#else + a = ad; + m = md; + r = rd; + ah = a + 128; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(ah, 128, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; +#endif + if (e[0] == 0) + err = MP_EXPTMOD_E; + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 128, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_4096_sqr_128(r, ah); + err = sp_4096_mod_128_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_128(r, ah, r); + err = sp_4096_mod_128_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 128); + err = sp_4096_mod_128_cond(a, a, m); + + if (err == MP_OKAY) { + for (i=31; i>=0; i--) + if (e[0] >> i) + break; + + XMEMCPY(r, a, sizeof(sp_digit) * 128); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_128(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) + sp_4096_mont_mul_128(r, r, a, m, mp); + } + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128); + sp_4096_mont_reduce_128(r, m, mp); + + for (i = 127; i > 0; i--) { + if (r[i] != m[i]) + break; + } + if (r[i] >= m[i]) + sp_4096_sub_in_place_128(r, m); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_RSA); +#endif + + return err; +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[128 * 2]; + sp_digit pd[64], qd[64], dpd[64]; + sp_digit tmpad[128], tmpbd[128]; +#else + sp_digit* t = NULL; +#endif + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmp; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 128 * 2; + q = p + 64; + qi = dq = dp = q + 64; + tmpa = qi + 64; + tmpb = tmpa + 128; + + tmp = t; + r = tmp + 128; + } +#else + r = a = ad; + p = pd; + q = qd; + qi = dq = dp = dpd; + tmpa = tmpad; + tmpb = tmpbd; + tmp = a + 128; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 128, in, inLen); + sp_4096_from_mp(p, 64, pm); + sp_4096_from_mp(q, 64, qm); + sp_4096_from_mp(dp, 64, dpm); + + err = sp_4096_mod_exp_64(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 64, dqm); + err = sp_4096_mod_exp_64(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_4096_sub_in_place_64(tmpa, tmpb); + sp_4096_mask_64(tmp, p, c); + sp_4096_add_64(tmpa, tmpa, tmp); + + sp_4096_from_mp(qi, 64, qim); + sp_4096_mul_64(tmpa, tmpa, qi); + err = sp_4096_mod_64(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_64(tmpa, q, tmpa); + XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64); + sp_4096_add_128(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpad, 0, sizeof(tmpad)); + XMEMSET(tmpbd, 0, sizeof(tmpbd)); + XMEMSET(pd, 0, sizeof(pd)); + XMEMSET(qd, 0, sizeof(qd)); + XMEMSET(dpd, 0, sizeof(dpd)); +#endif + + return err; +} +#endif +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 128); + r->used = 128; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= a[i] << s; + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = a[i] >> s; + } + } + s = 32 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_mp(e, 128, exp); + sp_4096_from_mp(m, 128, mod); + + err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "ldr r3, [%[a], #508]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #504]\n\t" + "str r4, [%[r], #512]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #500]\n\t" + "str r3, [%[r], #508]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #496]\n\t" + "str r2, [%[r], #504]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #492]\n\t" + "str r4, [%[r], #500]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #488]\n\t" + "str r3, [%[r], #496]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #484]\n\t" + "str r2, [%[r], #492]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #480]\n\t" + "str r4, [%[r], #488]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #476]\n\t" + "str r3, [%[r], #484]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #472]\n\t" + "str r2, [%[r], #480]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #468]\n\t" + "str r4, [%[r], #476]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #464]\n\t" + "str r3, [%[r], #472]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #460]\n\t" + "str r2, [%[r], #468]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #456]\n\t" + "str r4, [%[r], #464]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #452]\n\t" + "str r3, [%[r], #460]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #448]\n\t" + "str r2, [%[r], #456]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #444]\n\t" + "str r4, [%[r], #452]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #440]\n\t" + "str r3, [%[r], #448]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #436]\n\t" + "str r2, [%[r], #444]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #432]\n\t" + "str r4, [%[r], #440]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #428]\n\t" + "str r3, [%[r], #436]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #424]\n\t" + "str r2, [%[r], #432]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #420]\n\t" + "str r4, [%[r], #428]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #416]\n\t" + "str r3, [%[r], #424]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #412]\n\t" + "str r2, [%[r], #420]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #408]\n\t" + "str r4, [%[r], #416]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #404]\n\t" + "str r3, [%[r], #412]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #400]\n\t" + "str r2, [%[r], #408]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #396]\n\t" + "str r4, [%[r], #404]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #392]\n\t" + "str r3, [%[r], #400]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #388]\n\t" + "str r2, [%[r], #396]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #384]\n\t" + "str r4, [%[r], #392]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #380]\n\t" + "str r3, [%[r], #388]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #376]\n\t" + "str r2, [%[r], #384]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #372]\n\t" + "str r4, [%[r], #380]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #368]\n\t" + "str r3, [%[r], #376]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #364]\n\t" + "str r2, [%[r], #372]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #360]\n\t" + "str r4, [%[r], #368]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #356]\n\t" + "str r3, [%[r], #364]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #352]\n\t" + "str r2, [%[r], #360]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #348]\n\t" + "str r4, [%[r], #356]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #344]\n\t" + "str r3, [%[r], #352]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #340]\n\t" + "str r2, [%[r], #348]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #336]\n\t" + "str r4, [%[r], #344]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #332]\n\t" + "str r3, [%[r], #340]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #328]\n\t" + "str r2, [%[r], #336]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #324]\n\t" + "str r4, [%[r], #332]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #320]\n\t" + "str r3, [%[r], #328]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #316]\n\t" + "str r2, [%[r], #324]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #312]\n\t" + "str r4, [%[r], #320]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #308]\n\t" + "str r3, [%[r], #316]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #304]\n\t" + "str r2, [%[r], #312]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #300]\n\t" + "str r4, [%[r], #308]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #296]\n\t" + "str r3, [%[r], #304]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #292]\n\t" + "str r2, [%[r], #300]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #288]\n\t" + "str r4, [%[r], #296]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #284]\n\t" + "str r3, [%[r], #292]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #280]\n\t" + "str r2, [%[r], #288]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #276]\n\t" + "str r4, [%[r], #284]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #272]\n\t" + "str r3, [%[r], #280]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #268]\n\t" + "str r2, [%[r], #276]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #264]\n\t" + "str r4, [%[r], #272]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #260]\n\t" + "str r3, [%[r], #268]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #256]\n\t" + "str r2, [%[r], #264]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #252]\n\t" + "str r4, [%[r], #260]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #248]\n\t" + "str r3, [%[r], #256]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #244]\n\t" + "str r2, [%[r], #252]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #240]\n\t" + "str r4, [%[r], #248]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #236]\n\t" + "str r3, [%[r], #244]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #232]\n\t" + "str r2, [%[r], #240]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #228]\n\t" + "str r4, [%[r], #236]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #224]\n\t" + "str r3, [%[r], #232]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #220]\n\t" + "str r2, [%[r], #228]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #216]\n\t" + "str r4, [%[r], #224]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #212]\n\t" + "str r3, [%[r], #220]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #208]\n\t" + "str r2, [%[r], #216]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #204]\n\t" + "str r4, [%[r], #212]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #200]\n\t" + "str r3, [%[r], #208]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #196]\n\t" + "str r2, [%[r], #204]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #192]\n\t" + "str r4, [%[r], #200]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #188]\n\t" + "str r3, [%[r], #196]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #184]\n\t" + "str r2, [%[r], #192]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #180]\n\t" + "str r4, [%[r], #188]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #176]\n\t" + "str r3, [%[r], #184]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #172]\n\t" + "str r2, [%[r], #180]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #168]\n\t" + "str r4, [%[r], #176]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #164]\n\t" + "str r3, [%[r], #172]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #160]\n\t" + "str r2, [%[r], #168]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #156]\n\t" + "str r4, [%[r], #164]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #152]\n\t" + "str r3, [%[r], #160]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #148]\n\t" + "str r2, [%[r], #156]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #144]\n\t" + "str r4, [%[r], #152]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #140]\n\t" + "str r3, [%[r], #148]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #136]\n\t" + "str r2, [%[r], #144]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #132]\n\t" + "str r4, [%[r], #140]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #128]\n\t" + "str r3, [%[r], #136]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #124]\n\t" + "str r2, [%[r], #132]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #120]\n\t" + "str r4, [%[r], #128]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #116]\n\t" + "str r3, [%[r], #124]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #112]\n\t" + "str r2, [%[r], #120]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #108]\n\t" + "str r4, [%[r], #116]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #104]\n\t" + "str r3, [%[r], #112]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #100]\n\t" + "str r2, [%[r], #108]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #96]\n\t" + "str r4, [%[r], #104]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #92]\n\t" + "str r3, [%[r], #100]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #88]\n\t" + "str r2, [%[r], #96]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #84]\n\t" + "str r4, [%[r], #92]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #80]\n\t" + "str r3, [%[r], #88]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #76]\n\t" + "str r2, [%[r], #84]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #72]\n\t" + "str r4, [%[r], #80]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #68]\n\t" + "str r3, [%[r], #76]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #64]\n\t" + "str r2, [%[r], #72]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[256]; + sp_digit td[129]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 256; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_4096_lshift_128(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_lshift_128(r, r, y); + sp_4096_mul_d_128(tmp, norm, r[128]); + r[128] = 0; + o = sp_4096_add_128(r, r, tmp); + sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_bin(e, 128, exp, expLen); + sp_4096_from_mp(m, 128, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1) + err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m); + else + #endif + err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_ECC #ifndef WOLFSSL_SP_NO_256 diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index 672580068..83b4ac7ff 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -1150,7 +1150,7 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, u += sp_2048_add_16(r + 8, r + 8, z1); r[24] = u; XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - sp_2048_add_16(r + 16, r + 16, z2); + (void)sp_2048_add_16(r + 16, r + 16, z2); } /* Square a and put result in r. (r = a * a) @@ -1177,7 +1177,7 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) u += sp_2048_add_16(r + 8, r + 8, z1); r[24] = u; XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - sp_2048_add_16(r + 16, r + 16, z2); + (void)sp_2048_add_16(r + 16, r + 16, z2); } /* Sub b from a into a. (a -= b) @@ -1440,7 +1440,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, u += sp_2048_add_32(r + 16, r + 16, z1); r[48] = u; XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - sp_2048_add_32(r + 32, r + 32, z2); + (void)sp_2048_add_32(r + 32, r + 32, z2); } /* Square a and put result in r. (r = a * a) @@ -1467,7 +1467,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) u += sp_2048_add_32(r + 16, r + 16, z1); r[48] = u; XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - sp_2048_add_32(r + 32, r + 32, z2); + (void)sp_2048_add_32(r + 32, r + 32, z2); } #endif /* !WOLFSSL_SP_SMALL */ @@ -7746,7 +7746,7 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, u += sp_3072_add_24(r + 12, r + 12, z1); r[36] = u; XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - sp_3072_add_24(r + 24, r + 24, z2); + (void)sp_3072_add_24(r + 24, r + 24, z2); } /* Square a and put result in r. (r = a * a) @@ -7773,7 +7773,7 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) u += sp_3072_add_24(r + 12, r + 12, z1); r[36] = u; XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - sp_3072_add_24(r + 24, r + 24, z2); + (void)sp_3072_add_24(r + 24, r + 24, z2); } /* Sub b from a into a. (a -= b) @@ -8116,7 +8116,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, u += sp_3072_add_48(r + 24, r + 24, z1); r[72] = u; XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - sp_3072_add_48(r + 48, r + 48, z2); + (void)sp_3072_add_48(r + 48, r + 48, z2); } /* Square a and put result in r. (r = a * a) @@ -8143,7 +8143,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) u += sp_3072_add_48(r + 24, r + 24, z1); r[72] = u; XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - sp_3072_add_48(r + 48, r + 48, z2); + (void)sp_3072_add_48(r + 48, r + 48, z2); } #endif /* !WOLFSSL_SP_SMALL */ @@ -13058,6 +13058,17441 @@ int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #endif /* !WOLFSSL_SP_NO_3072 */ +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 56U) { + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<64 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/ + if (j < 0) { + break; + } + while (b < 64) { + a[j--] = r[i] >> b; b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 64); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adds x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "ldp x9, x10, [%[b], 208]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "ldp x9, x10, [%[b], 240]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "cset %[c], cs\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return c; +} + +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldp x2, x3, [%[a], 0]\n\t" + "ldp x4, x5, [%[a], 16]\n\t" + "ldp x6, x7, [%[b], 0]\n\t" + "ldp x8, x9, [%[b], 16]\n\t" + "subs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 0]\n\t" + "stp x4, x5, [%[a], 16]\n\t" + "ldp x2, x3, [%[a], 32]\n\t" + "ldp x4, x5, [%[a], 48]\n\t" + "ldp x6, x7, [%[b], 32]\n\t" + "ldp x8, x9, [%[b], 48]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 32]\n\t" + "stp x4, x5, [%[a], 48]\n\t" + "ldp x2, x3, [%[a], 64]\n\t" + "ldp x4, x5, [%[a], 80]\n\t" + "ldp x6, x7, [%[b], 64]\n\t" + "ldp x8, x9, [%[b], 80]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 64]\n\t" + "stp x4, x5, [%[a], 80]\n\t" + "ldp x2, x3, [%[a], 96]\n\t" + "ldp x4, x5, [%[a], 112]\n\t" + "ldp x6, x7, [%[b], 96]\n\t" + "ldp x8, x9, [%[b], 112]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 96]\n\t" + "stp x4, x5, [%[a], 112]\n\t" + "ldp x2, x3, [%[a], 128]\n\t" + "ldp x4, x5, [%[a], 144]\n\t" + "ldp x6, x7, [%[b], 128]\n\t" + "ldp x8, x9, [%[b], 144]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 128]\n\t" + "stp x4, x5, [%[a], 144]\n\t" + "ldp x2, x3, [%[a], 160]\n\t" + "ldp x4, x5, [%[a], 176]\n\t" + "ldp x6, x7, [%[b], 160]\n\t" + "ldp x8, x9, [%[b], 176]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 160]\n\t" + "stp x4, x5, [%[a], 176]\n\t" + "ldp x2, x3, [%[a], 192]\n\t" + "ldp x4, x5, [%[a], 208]\n\t" + "ldp x6, x7, [%[b], 192]\n\t" + "ldp x8, x9, [%[b], 208]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 192]\n\t" + "stp x4, x5, [%[a], 208]\n\t" + "ldp x2, x3, [%[a], 224]\n\t" + "ldp x4, x5, [%[a], 240]\n\t" + "ldp x6, x7, [%[b], 224]\n\t" + "ldp x8, x9, [%[b], 240]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 224]\n\t" + "stp x4, x5, [%[a], 240]\n\t" + "ldp x2, x3, [%[a], 256]\n\t" + "ldp x4, x5, [%[a], 272]\n\t" + "ldp x6, x7, [%[b], 256]\n\t" + "ldp x8, x9, [%[b], 272]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 256]\n\t" + "stp x4, x5, [%[a], 272]\n\t" + "ldp x2, x3, [%[a], 288]\n\t" + "ldp x4, x5, [%[a], 304]\n\t" + "ldp x6, x7, [%[b], 288]\n\t" + "ldp x8, x9, [%[b], 304]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 288]\n\t" + "stp x4, x5, [%[a], 304]\n\t" + "ldp x2, x3, [%[a], 320]\n\t" + "ldp x4, x5, [%[a], 336]\n\t" + "ldp x6, x7, [%[b], 320]\n\t" + "ldp x8, x9, [%[b], 336]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 320]\n\t" + "stp x4, x5, [%[a], 336]\n\t" + "ldp x2, x3, [%[a], 352]\n\t" + "ldp x4, x5, [%[a], 368]\n\t" + "ldp x6, x7, [%[b], 352]\n\t" + "ldp x8, x9, [%[b], 368]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 352]\n\t" + "stp x4, x5, [%[a], 368]\n\t" + "ldp x2, x3, [%[a], 384]\n\t" + "ldp x4, x5, [%[a], 400]\n\t" + "ldp x6, x7, [%[b], 384]\n\t" + "ldp x8, x9, [%[b], 400]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 384]\n\t" + "stp x4, x5, [%[a], 400]\n\t" + "ldp x2, x3, [%[a], 416]\n\t" + "ldp x4, x5, [%[a], 432]\n\t" + "ldp x6, x7, [%[b], 416]\n\t" + "ldp x8, x9, [%[b], 432]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 416]\n\t" + "stp x4, x5, [%[a], 432]\n\t" + "ldp x2, x3, [%[a], 448]\n\t" + "ldp x4, x5, [%[a], 464]\n\t" + "ldp x6, x7, [%[b], 448]\n\t" + "ldp x8, x9, [%[b], 464]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 448]\n\t" + "stp x4, x5, [%[a], 464]\n\t" + "ldp x2, x3, [%[a], 480]\n\t" + "ldp x4, x5, [%[a], 496]\n\t" + "ldp x6, x7, [%[b], 480]\n\t" + "ldp x8, x9, [%[b], 496]\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a], 480]\n\t" + "stp x4, x5, [%[a], 496]\n\t" + "csetm %[c], cc\n\t" + : [c] "+r" (c) + : [a] "r" (a), [b] "r" (b) + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "ldp x7, x8, [%[b], 0]\n\t" + "ldp x9, x10, [%[b], 16]\n\t" + "adds x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 32]\n\t" + "ldp x5, x6, [%[a], 48]\n\t" + "ldp x7, x8, [%[b], 32]\n\t" + "ldp x9, x10, [%[b], 48]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 32]\n\t" + "stp x5, x6, [%[r], 48]\n\t" + "ldp x3, x4, [%[a], 64]\n\t" + "ldp x5, x6, [%[a], 80]\n\t" + "ldp x7, x8, [%[b], 64]\n\t" + "ldp x9, x10, [%[b], 80]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 64]\n\t" + "stp x5, x6, [%[r], 80]\n\t" + "ldp x3, x4, [%[a], 96]\n\t" + "ldp x5, x6, [%[a], 112]\n\t" + "ldp x7, x8, [%[b], 96]\n\t" + "ldp x9, x10, [%[b], 112]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 96]\n\t" + "stp x5, x6, [%[r], 112]\n\t" + "ldp x3, x4, [%[a], 128]\n\t" + "ldp x5, x6, [%[a], 144]\n\t" + "ldp x7, x8, [%[b], 128]\n\t" + "ldp x9, x10, [%[b], 144]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 128]\n\t" + "stp x5, x6, [%[r], 144]\n\t" + "ldp x3, x4, [%[a], 160]\n\t" + "ldp x5, x6, [%[a], 176]\n\t" + "ldp x7, x8, [%[b], 160]\n\t" + "ldp x9, x10, [%[b], 176]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 160]\n\t" + "stp x5, x6, [%[r], 176]\n\t" + "ldp x3, x4, [%[a], 192]\n\t" + "ldp x5, x6, [%[a], 208]\n\t" + "ldp x7, x8, [%[b], 192]\n\t" + "ldp x9, x10, [%[b], 208]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 192]\n\t" + "stp x5, x6, [%[r], 208]\n\t" + "ldp x3, x4, [%[a], 224]\n\t" + "ldp x5, x6, [%[a], 240]\n\t" + "ldp x7, x8, [%[b], 224]\n\t" + "ldp x9, x10, [%[b], 240]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 224]\n\t" + "stp x5, x6, [%[r], 240]\n\t" + "ldp x3, x4, [%[a], 256]\n\t" + "ldp x5, x6, [%[a], 272]\n\t" + "ldp x7, x8, [%[b], 256]\n\t" + "ldp x9, x10, [%[b], 272]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 256]\n\t" + "stp x5, x6, [%[r], 272]\n\t" + "ldp x3, x4, [%[a], 288]\n\t" + "ldp x5, x6, [%[a], 304]\n\t" + "ldp x7, x8, [%[b], 288]\n\t" + "ldp x9, x10, [%[b], 304]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 288]\n\t" + "stp x5, x6, [%[r], 304]\n\t" + "ldp x3, x4, [%[a], 320]\n\t" + "ldp x5, x6, [%[a], 336]\n\t" + "ldp x7, x8, [%[b], 320]\n\t" + "ldp x9, x10, [%[b], 336]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 320]\n\t" + "stp x5, x6, [%[r], 336]\n\t" + "ldp x3, x4, [%[a], 352]\n\t" + "ldp x5, x6, [%[a], 368]\n\t" + "ldp x7, x8, [%[b], 352]\n\t" + "ldp x9, x10, [%[b], 368]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 352]\n\t" + "stp x5, x6, [%[r], 368]\n\t" + "ldp x3, x4, [%[a], 384]\n\t" + "ldp x5, x6, [%[a], 400]\n\t" + "ldp x7, x8, [%[b], 384]\n\t" + "ldp x9, x10, [%[b], 400]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 384]\n\t" + "stp x5, x6, [%[r], 400]\n\t" + "ldp x3, x4, [%[a], 416]\n\t" + "ldp x5, x6, [%[a], 432]\n\t" + "ldp x7, x8, [%[b], 416]\n\t" + "ldp x9, x10, [%[b], 432]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 416]\n\t" + "stp x5, x6, [%[r], 432]\n\t" + "ldp x3, x4, [%[a], 448]\n\t" + "ldp x5, x6, [%[a], 464]\n\t" + "ldp x7, x8, [%[b], 448]\n\t" + "ldp x9, x10, [%[b], 464]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 448]\n\t" + "stp x5, x6, [%[r], 464]\n\t" + "ldp x3, x4, [%[a], 480]\n\t" + "ldp x5, x6, [%[a], 496]\n\t" + "ldp x7, x8, [%[b], 480]\n\t" + "ldp x9, x10, [%[b], 496]\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r], 480]\n\t" + "stp x5, x6, [%[r], 496]\n\t" + "cset %[c], cs\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return c; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_4096_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[32]; + + __asm__ __volatile__ ( + "# A[0] * B[0]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x3, x7, x8\n\t" + "umulh x4, x7, x8\n\t" + "mov x5, 0\n\t" + "str x3, [%[tmp]]\n\t" + "# A[0] * B[1]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[0]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 8]\n\t" + "# A[0] * B[2]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[1]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[0]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 16]\n\t" + "# A[0] * B[3]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[2]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[1]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[0]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 24]\n\t" + "# A[0] * B[4]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[3]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[2]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[1]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[0]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 32]\n\t" + "# A[0] * B[5]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[4]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[3]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[2]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[1]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[0]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 40]\n\t" + "# A[0] * B[6]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[5]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[4]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[3]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[2]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[1]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[0]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 48]\n\t" + "# A[0] * B[7]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[6]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[5]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[4]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[3]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[2]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[1]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[0]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 56]\n\t" + "# A[0] * B[8]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[7]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[6]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[5]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[4]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[3]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[2]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[1]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[0]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 64]\n\t" + "# A[0] * B[9]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[8]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[7]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[6]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[5]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[4]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[3]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[2]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[1]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[0]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 72]\n\t" + "# A[0] * B[10]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[9]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[8]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[7]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[6]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[5]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[4]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[3]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[2]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[1]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[0]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 80]\n\t" + "# A[0] * B[11]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[10]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[9]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[8]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[7]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[6]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[5]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[4]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[3]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[2]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[1]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[0]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 88]\n\t" + "# A[0] * B[12]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[11]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[10]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[9]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[8]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[7]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[6]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[5]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[4]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[3]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[2]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[1]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[0]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 96]\n\t" + "# A[0] * B[13]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[12]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[11]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[10]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[9]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[8]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[7]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[6]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[5]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[4]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[3]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[2]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[1]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[0]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 104]\n\t" + "# A[0] * B[14]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[13]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[12]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[11]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[10]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[9]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[8]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[7]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[6]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[5]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[4]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[3]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[2]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[1]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[0]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 112]\n\t" + "# A[0] * B[15]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[14]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[13]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[12]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[11]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[10]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[9]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[8]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[7]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[6]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[5]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[4]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[3]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[2]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[1]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[0]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 120]\n\t" + "# A[0] * B[16]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[15]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[14]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[13]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[12]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[11]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[10]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[9]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[8]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[7]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[6]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[5]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[4]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[3]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[2]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[1]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[0]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 128]\n\t" + "# A[0] * B[17]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[16]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[15]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[14]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[13]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[12]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[11]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[10]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[9]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[8]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[7]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[6]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[5]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[4]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[3]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[2]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[1]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[0]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 136]\n\t" + "# A[0] * B[18]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[17]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[16]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[15]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[14]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[13]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[12]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[11]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[10]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[9]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[8]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[7]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[6]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[5]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[4]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[3]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[2]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[1]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[0]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 144]\n\t" + "# A[0] * B[19]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[18]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[17]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[16]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[15]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[14]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[13]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[12]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[11]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[10]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[9]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[8]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[7]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[6]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[5]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[4]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[3]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[2]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[1]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[0]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 152]\n\t" + "# A[0] * B[20]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[19]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[18]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[17]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[16]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[15]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[14]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[13]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[12]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[11]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[10]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[9]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[8]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[7]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[6]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[5]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[4]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[3]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[2]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[1]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[0]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 160]\n\t" + "# A[0] * B[21]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[20]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[19]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[18]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[17]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[16]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[15]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[14]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[13]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[12]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[11]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[10]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[9]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[8]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[7]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[6]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[5]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[4]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[3]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[2]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[1]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[0]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 168]\n\t" + "# A[0] * B[22]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[21]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[20]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[19]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[18]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[17]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[16]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[15]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[14]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[13]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[12]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[11]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[10]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[9]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[8]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[7]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[6]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[5]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[4]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[3]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[2]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[1]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[0]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 176]\n\t" + "# A[0] * B[23]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[22]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[21]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[20]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[19]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[18]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[17]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[16]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[15]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[14]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[13]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[12]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[11]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[10]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[9]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[8]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[7]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[6]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[5]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[4]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[3]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[2]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[1]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[0]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 184]\n\t" + "# A[0] * B[24]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[23]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[22]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[21]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[20]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[19]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[18]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[17]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[16]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[15]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[14]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[13]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[12]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[11]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[10]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[9]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[8]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[7]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[6]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[5]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[4]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[3]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[2]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[1]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[0]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 192]\n\t" + "# A[0] * B[25]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[24]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[23]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[22]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[21]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[20]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[19]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[18]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[17]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[16]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[15]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[14]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[13]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[12]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[11]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[10]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[9]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[8]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[7]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[6]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[5]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[4]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[3]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[2]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[1]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[0]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 200]\n\t" + "# A[0] * B[26]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[25]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[24]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[23]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[22]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[21]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[20]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[19]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[18]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[17]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[16]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[15]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[14]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[13]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[12]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[11]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[10]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[9]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[8]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[7]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[6]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[5]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[4]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[3]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[2]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[1]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[0]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 208]\n\t" + "# A[0] * B[27]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[26]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[25]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[24]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[23]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[22]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[21]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[20]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[19]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[18]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[17]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[16]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[15]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[14]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[13]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[12]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[11]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[10]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[9]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[8]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[7]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[6]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[5]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[4]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[3]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[2]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[1]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[0]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 216]\n\t" + "# A[0] * B[28]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[27]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[26]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[25]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[24]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[23]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[22]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[21]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[20]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[19]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[18]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[17]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[16]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[15]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[14]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[13]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[12]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[11]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[10]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[9]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[8]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[7]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[6]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[5]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[4]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[3]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[2]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[1]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[0]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 224]\n\t" + "# A[0] * B[29]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[1] * B[28]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[2] * B[27]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[26]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[25]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[24]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[23]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[22]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[21]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[20]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[19]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[18]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[17]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[16]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[15]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[14]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[13]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[12]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[11]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[10]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[9]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[8]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[7]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[6]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[5]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[4]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[3]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[2]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[1]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[0]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[tmp], 232]\n\t" + "# A[0] * B[30]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[1] * B[29]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[2] * B[28]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[3] * B[27]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[26]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[25]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[24]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[23]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[22]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[21]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[20]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[19]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[18]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[17]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[16]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[15]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[14]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[13]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[12]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[11]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[10]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[9]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[8]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[7]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[6]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[5]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[4]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[3]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[2]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[1]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[0]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[tmp], 240]\n\t" + "# A[0] * B[31]\n\t" + "ldr x7, [%[a], 0]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[1] * B[30]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[2] * B[29]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[3] * B[28]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[4] * B[27]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[26]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[25]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[24]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[23]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[22]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[21]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[20]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[19]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[18]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[17]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[16]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[15]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[14]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[13]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[12]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[11]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[10]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[9]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[8]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[7]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[6]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[5]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[4]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[3]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[2]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[1]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[0]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 0]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 248]\n\t" + "# A[1] * B[31]\n\t" + "ldr x7, [%[a], 8]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[2] * B[30]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[3] * B[29]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[4] * B[28]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[5] * B[27]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[26]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[25]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[24]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[23]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[22]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[21]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[20]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[19]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[18]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[17]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[16]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[15]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[14]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[13]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[12]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[11]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[10]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[9]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[8]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[7]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[6]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[5]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[4]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[3]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[2]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[1]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 8]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 256]\n\t" + "# A[2] * B[31]\n\t" + "ldr x7, [%[a], 16]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[3] * B[30]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[4] * B[29]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[5] * B[28]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[6] * B[27]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[26]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[25]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[24]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[23]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[22]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[21]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[20]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[19]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[18]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[17]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[16]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[15]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[14]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[13]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[12]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[11]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[10]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[9]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[8]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[7]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[6]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[5]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[4]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[3]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[2]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 16]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 264]\n\t" + "# A[3] * B[31]\n\t" + "ldr x7, [%[a], 24]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[4] * B[30]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[5] * B[29]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[6] * B[28]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[7] * B[27]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[26]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[25]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[24]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[23]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[22]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[21]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[20]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[19]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[18]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[17]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[16]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[15]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[14]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[13]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[12]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[11]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[10]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[9]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[8]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[7]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[6]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[5]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[4]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[3]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 24]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 272]\n\t" + "# A[4] * B[31]\n\t" + "ldr x7, [%[a], 32]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[5] * B[30]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[6] * B[29]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[7] * B[28]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[8] * B[27]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[26]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[25]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[24]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[23]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[22]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[21]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[20]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[19]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[18]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[17]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[16]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[15]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[14]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[13]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[12]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[11]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[10]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[9]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[8]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[7]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[6]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[5]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[4]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 32]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 280]\n\t" + "# A[5] * B[31]\n\t" + "ldr x7, [%[a], 40]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[6] * B[30]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[7] * B[29]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[8] * B[28]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[9] * B[27]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[26]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[25]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[24]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[23]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[22]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[21]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[20]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[19]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[18]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[17]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[16]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[15]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[14]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[13]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[12]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[11]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[10]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[9]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[8]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[7]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[6]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[5]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 40]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 288]\n\t" + "# A[6] * B[31]\n\t" + "ldr x7, [%[a], 48]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[7] * B[30]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[8] * B[29]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[9] * B[28]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[10] * B[27]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[26]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[25]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[24]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[23]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[22]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[21]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[20]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[19]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[18]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[17]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[16]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[15]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[14]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[13]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[12]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[11]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[10]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[9]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[8]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[7]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[6]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 48]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 296]\n\t" + "# A[7] * B[31]\n\t" + "ldr x7, [%[a], 56]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[8] * B[30]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[9] * B[29]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[10] * B[28]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[11] * B[27]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[26]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[25]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[24]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[23]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[22]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[21]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[20]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[19]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[18]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[17]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[16]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[15]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[14]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[13]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[12]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[11]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[10]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[9]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[8]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[7]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 56]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 304]\n\t" + "# A[8] * B[31]\n\t" + "ldr x7, [%[a], 64]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[9] * B[30]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[10] * B[29]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[11] * B[28]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[12] * B[27]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[26]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[25]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[24]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[23]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[22]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[21]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[20]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[19]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[18]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[17]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[16]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[15]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[14]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[13]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[12]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[11]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[10]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[9]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[8]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 64]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 312]\n\t" + "# A[9] * B[31]\n\t" + "ldr x7, [%[a], 72]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[10] * B[30]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[11] * B[29]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[12] * B[28]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[13] * B[27]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[26]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[25]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[24]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[23]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[22]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[21]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[20]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[19]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[18]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[17]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[16]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[15]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[14]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[13]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[12]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[11]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[10]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[9]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 72]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 320]\n\t" + "# A[10] * B[31]\n\t" + "ldr x7, [%[a], 80]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[11] * B[30]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[12] * B[29]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[13] * B[28]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[14] * B[27]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[26]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[25]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[24]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[23]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[22]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[21]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[20]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[19]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[18]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[17]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[16]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[15]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[14]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[13]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[12]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[11]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[10]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 80]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 328]\n\t" + "# A[11] * B[31]\n\t" + "ldr x7, [%[a], 88]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[12] * B[30]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[13] * B[29]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[14] * B[28]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[15] * B[27]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[26]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[25]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[24]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[23]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[22]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[21]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[20]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[19]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[18]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[17]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[16]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[15]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[14]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[13]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[12]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[11]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 88]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 336]\n\t" + "# A[12] * B[31]\n\t" + "ldr x7, [%[a], 96]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[13] * B[30]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[14] * B[29]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[15] * B[28]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[16] * B[27]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[26]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[25]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[24]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[23]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[22]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[21]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[20]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[19]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[18]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[17]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[16]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[15]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[14]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[13]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[12]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 96]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 344]\n\t" + "# A[13] * B[31]\n\t" + "ldr x7, [%[a], 104]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[14] * B[30]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[15] * B[29]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[16] * B[28]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[17] * B[27]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[26]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[25]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[24]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[23]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[22]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[21]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[20]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[19]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[18]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[17]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[16]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[15]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[14]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[13]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 104]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 352]\n\t" + "# A[14] * B[31]\n\t" + "ldr x7, [%[a], 112]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[15] * B[30]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[16] * B[29]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[17] * B[28]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[18] * B[27]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[26]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[25]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[24]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[23]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[22]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[21]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[20]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[19]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[18]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[17]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[16]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[15]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[14]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 112]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 360]\n\t" + "# A[15] * B[31]\n\t" + "ldr x7, [%[a], 120]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[16] * B[30]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[17] * B[29]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[18] * B[28]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[19] * B[27]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[26]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[25]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[24]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[23]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[22]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[21]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[20]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[19]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[18]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[17]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[16]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[15]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 120]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 368]\n\t" + "# A[16] * B[31]\n\t" + "ldr x7, [%[a], 128]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[17] * B[30]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[18] * B[29]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[19] * B[28]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[20] * B[27]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[26]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[25]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[24]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[23]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[22]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[21]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[20]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[19]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[18]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[17]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[16]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 376]\n\t" + "# A[17] * B[31]\n\t" + "ldr x7, [%[a], 136]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[18] * B[30]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[19] * B[29]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[20] * B[28]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[21] * B[27]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[26]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[25]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[24]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[23]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[22]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[21]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[20]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[19]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[18]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[17]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 384]\n\t" + "# A[18] * B[31]\n\t" + "ldr x7, [%[a], 144]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[19] * B[30]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[20] * B[29]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[21] * B[28]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[22] * B[27]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[26]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[25]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[24]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[23]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[22]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[21]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[20]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[19]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[18]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 392]\n\t" + "# A[19] * B[31]\n\t" + "ldr x7, [%[a], 152]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[20] * B[30]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[21] * B[29]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[22] * B[28]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[23] * B[27]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[26]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[25]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[24]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[23]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[22]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[21]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[20]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[19]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 400]\n\t" + "# A[20] * B[31]\n\t" + "ldr x7, [%[a], 160]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[21] * B[30]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[22] * B[29]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[23] * B[28]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[24] * B[27]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[26]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[25]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[24]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[23]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[22]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[21]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[20]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 408]\n\t" + "# A[21] * B[31]\n\t" + "ldr x7, [%[a], 168]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[22] * B[30]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[23] * B[29]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[24] * B[28]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[25] * B[27]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[26]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[25]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[24]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[23]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[22]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[21]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 416]\n\t" + "# A[22] * B[31]\n\t" + "ldr x7, [%[a], 176]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[23] * B[30]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[24] * B[29]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[25] * B[28]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[26] * B[27]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[26]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[25]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[24]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[23]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[22]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 424]\n\t" + "# A[23] * B[31]\n\t" + "ldr x7, [%[a], 184]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[24] * B[30]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[25] * B[29]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[26] * B[28]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[27] * B[27]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[26]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[25]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[24]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[23]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 432]\n\t" + "# A[24] * B[31]\n\t" + "ldr x7, [%[a], 192]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[25] * B[30]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[26] * B[29]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[27] * B[28]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[28] * B[27]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[26]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[25]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[24]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 440]\n\t" + "# A[25] * B[31]\n\t" + "ldr x7, [%[a], 200]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[26] * B[30]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[27] * B[29]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[28] * B[28]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[29] * B[27]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[26]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[25]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 448]\n\t" + "# A[26] * B[31]\n\t" + "ldr x7, [%[a], 208]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[27] * B[30]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[28] * B[29]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[29] * B[28]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[30] * B[27]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[26]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 456]\n\t" + "# A[27] * B[31]\n\t" + "ldr x7, [%[a], 216]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[28] * B[30]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * B[29]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[30] * B[28]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "# A[31] * B[27]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 464]\n\t" + "# A[28] * B[31]\n\t" + "ldr x7, [%[a], 224]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "# A[29] * B[30]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * B[29]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "# A[31] * B[28]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, x4, xzr\n\t" + "str x5, [%[r], 472]\n\t" + "# A[29] * B[31]\n\t" + "ldr x7, [%[a], 232]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "# A[30] * B[30]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "# A[31] * B[29]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, x5, xzr\n\t" + "str x3, [%[r], 480]\n\t" + "# A[30] * B[31]\n\t" + "ldr x7, [%[a], 240]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "# A[31] * B[30]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 488]\n\t" + "# A[31] * B[31]\n\t" + "ldr x7, [%[a], 248]\n\t" + "ldr x8, [%[b], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adc x3, x3, x7\n\t" + "stp x5, x3, [%[r], 496]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<32; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit b1[32]; + sp_digit z2[64]; + sp_digit u, ca, cb; + + ca = sp_2048_add_32(a1, a, &a[32]); + cb = sp_2048_add_32(b1, b, &b[32]); + u = ca & cb; + sp_2048_mul_32(z1, a1, b1); + sp_2048_mul_32(z2, &a[32], &b[32]); + sp_2048_mul_32(z0, a, b); + sp_2048_mask_32(r + 64, a1, 0 - cb); + sp_2048_mask_32(b1, b1, 0 - ca); + u += sp_2048_add_32(r + 64, r + 64, b1); + u += sp_4096_sub_in_place_64(z1, z2); + u += sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + (void)sp_4096_add_64(r + 64, r + 64, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_4096_sqr_32(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[32]; + + __asm__ __volatile__ ( + "# A[0] * A[0]\n\t" + "ldr x9, [%[a], 0]\n\t" + "mul x8, x9, x9\n\t" + "umulh x3, x9, x9\n\t" + "mov x4, 0\n\t" + "str x8, [%[tmp]]\n\t" + "# A[0] * A[1]\n\t" + "ldr x9, [%[a], 8]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "str x3, [%[tmp], 8]\n\t" + "# A[0] * A[2]\n\t" + "ldr x9, [%[a], 16]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "# A[1] * A[1]\n\t" + "ldr x9, [%[a], 8]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[tmp], 16]\n\t" + "# A[0] * A[3]\n\t" + "ldr x9, [%[a], 24]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "# A[1] * A[2]\n\t" + "ldr x9, [%[a], 16]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "str x2, [%[tmp], 24]\n\t" + "# A[0] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "# A[1] * A[3]\n\t" + "ldr x9, [%[a], 24]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "# A[2] * A[2]\n\t" + "ldr x9, [%[a], 16]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "str x3, [%[tmp], 32]\n\t" + "# A[0] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[3]\n\t" + "ldr x9, [%[a], 24]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 40]\n\t" + "# A[0] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[3]\n\t" + "ldr x9, [%[a], 24]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 48]\n\t" + "# A[0] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 56]\n\t" + "# A[0] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[4]\n\t" + "ldr x9, [%[a], 32]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 64]\n\t" + "# A[0] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 72]\n\t" + "# A[0] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[5]\n\t" + "ldr x9, [%[a], 40]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 80]\n\t" + "# A[0] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 88]\n\t" + "# A[0] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[6]\n\t" + "ldr x9, [%[a], 48]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 96]\n\t" + "# A[0] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 104]\n\t" + "# A[0] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[7]\n\t" + "ldr x9, [%[a], 56]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 112]\n\t" + "# A[0] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 120]\n\t" + "# A[0] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[8]\n\t" + "ldr x9, [%[a], 64]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 128]\n\t" + "# A[0] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 136]\n\t" + "# A[0] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[9]\n\t" + "ldr x9, [%[a], 72]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 144]\n\t" + "# A[0] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 152]\n\t" + "# A[0] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[10]\n\t" + "ldr x9, [%[a], 80]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 160]\n\t" + "# A[0] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 168]\n\t" + "# A[0] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[11]\n\t" + "ldr x9, [%[a], 88]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 176]\n\t" + "# A[0] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 184]\n\t" + "# A[0] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[12]\n\t" + "ldr x9, [%[a], 96]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 192]\n\t" + "# A[0] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 200]\n\t" + "# A[0] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[13]\n\t" + "ldr x9, [%[a], 104]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 208]\n\t" + "# A[0] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 216]\n\t" + "# A[0] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[14]\n\t" + "ldr x9, [%[a], 112]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 224]\n\t" + "# A[0] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[tmp], 232]\n\t" + "# A[0] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[15]\n\t" + "ldr x9, [%[a], 120]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[tmp], 240]\n\t" + "# A[0] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 0]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[1] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[2] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[tmp], 248]\n\t" + "# A[1] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 8]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[2] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[3] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[16]\n\t" + "ldr x9, [%[a], 128]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 256]\n\t" + "# A[2] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 16]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[3] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[4] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 264]\n\t" + "# A[3] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 24]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[4] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[5] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[17]\n\t" + "ldr x9, [%[a], 136]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 272]\n\t" + "# A[4] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 32]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[5] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[6] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 280]\n\t" + "# A[5] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 40]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[6] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[7] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[18]\n\t" + "ldr x9, [%[a], 144]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 288]\n\t" + "# A[6] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 48]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[7] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[8] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 296]\n\t" + "# A[7] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 56]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[8] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[9] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[19]\n\t" + "ldr x9, [%[a], 152]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 304]\n\t" + "# A[8] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 64]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[9] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[10] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 312]\n\t" + "# A[9] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 72]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[10] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[11] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[20]\n\t" + "ldr x9, [%[a], 160]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 320]\n\t" + "# A[10] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 80]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[11] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[12] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 328]\n\t" + "# A[11] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 88]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[12] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[13] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[21] * A[21]\n\t" + "ldr x9, [%[a], 168]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 336]\n\t" + "# A[12] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 96]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[13] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[14] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[21] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 344]\n\t" + "# A[13] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 104]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[14] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[15] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[21] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[22] * A[22]\n\t" + "ldr x9, [%[a], 176]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 352]\n\t" + "# A[14] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 112]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[15] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[16] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[21] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[22] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 360]\n\t" + "# A[15] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 120]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[16] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[17] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[21] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[22] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[23] * A[23]\n\t" + "ldr x9, [%[a], 184]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 368]\n\t" + "# A[16] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 128]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[17] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[18] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[21] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[22] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[23] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "ldr x10, [%[a], 184]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 376]\n\t" + "# A[17] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 136]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[18] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[19] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[21] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[22] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[23] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 184]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[24] * A[24]\n\t" + "ldr x9, [%[a], 192]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 384]\n\t" + "# A[18] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 144]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[19] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[20] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[21] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[22] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[23] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 184]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[24] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "ldr x10, [%[a], 192]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 392]\n\t" + "# A[19] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 152]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[20] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[21] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[22] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[23] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 184]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[24] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 192]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[25] * A[25]\n\t" + "ldr x9, [%[a], 200]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 400]\n\t" + "# A[20] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 160]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[21] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[22] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[23] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 184]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[24] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 192]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[25] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "ldr x10, [%[a], 200]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 408]\n\t" + "# A[21] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 168]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[22] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[23] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 184]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[24] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 192]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[25] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 200]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[26] * A[26]\n\t" + "ldr x9, [%[a], 208]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 416]\n\t" + "# A[22] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 176]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[23] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 184]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[24] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 192]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[25] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 200]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[26] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "ldr x10, [%[a], 208]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 424]\n\t" + "# A[23] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 184]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[24] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 192]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[25] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 200]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[26] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 208]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[27] * A[27]\n\t" + "ldr x9, [%[a], 216]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 432]\n\t" + "# A[24] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 192]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x2, 0\n\t" + "mov x7, 0\n\t" + "# A[25] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 200]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[26] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 208]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[27] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "ldr x10, [%[a], 216]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x3, x3, x5\n\t" + "adcs x4, x4, x6\n\t" + "adc x2, x2, x7\n\t" + "str x3, [%[r], 440]\n\t" + "# A[25] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 200]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x3, 0\n\t" + "mov x7, 0\n\t" + "# A[26] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 208]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[27] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 216]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[28] * A[28]\n\t" + "ldr x9, [%[a], 224]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x4, x4, x5\n\t" + "adcs x2, x2, x6\n\t" + "adc x3, x3, x7\n\t" + "str x4, [%[r], 448]\n\t" + "# A[26] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 208]\n\t" + "mul x5, x9, x10\n\t" + "umulh x6, x9, x10\n\t" + "mov x4, 0\n\t" + "mov x7, 0\n\t" + "# A[27] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 216]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "# A[28] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "ldr x10, [%[a], 224]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x5, x5, x8\n\t" + "adcs x6, x6, x9\n\t" + "adc x7, x7, xzr\n\t" + "adds x5, x5, x5\n\t" + "adcs x6, x6, x6\n\t" + "adc x7, x7, x7\n\t" + "adds x2, x2, x5\n\t" + "adcs x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x2, [%[r], 456]\n\t" + "# A[27] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 216]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "# A[28] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 224]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "# A[29] * A[29]\n\t" + "ldr x9, [%[a], 232]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "str x3, [%[r], 464]\n\t" + "# A[28] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 224]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, xzr, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "# A[29] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "ldr x10, [%[a], 232]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "adds x4, x4, x8\n\t" + "adcs x2, x2, x9\n\t" + "adc x3, x3, xzr\n\t" + "str x4, [%[r], 472]\n\t" + "# A[29] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 232]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, xzr, xzr\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "# A[30] * A[30]\n\t" + "ldr x9, [%[a], 240]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x2, x2, x8\n\t" + "adcs x3, x3, x9\n\t" + "adc x4, x4, xzr\n\t" + "str x2, [%[r], 480]\n\t" + "# A[30] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "ldr x10, [%[a], 240]\n\t" + "mul x8, x9, x10\n\t" + "umulh x9, x9, x10\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, xzr, xzr\n\t" + "adds x3, x3, x8\n\t" + "adcs x4, x4, x9\n\t" + "adc x2, x2, xzr\n\t" + "str x3, [%[r], 488]\n\t" + "# A[31] * A[31]\n\t" + "ldr x9, [%[a], 248]\n\t" + "mul x8, x9, x9\n\t" + "umulh x9, x9, x9\n\t" + "adds x4, x4, x8\n\t" + "adc x2, x2, x9\n\t" + "stp x4, x2, [%[r], 496]\n\t" + : + : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) + : "memory", "x2", "x3", "x4", "x8", "x9", "x10", "x5", "x6", "x7" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[64]; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit u; + + u = sp_2048_add_32(a1, a, &a[32]); + sp_2048_sqr_32(z1, a1); + sp_2048_sqr_32(z2, &a[32]); + sp_2048_sqr_32(z0, a); + sp_2048_mask_32(r + 64, a1, 0 - u); + u += sp_2048_add_32(r + 64, r + 64, r + 64); + u += sp_4096_sub_in_place_64(z1, z2); + u += sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + (void)sp_4096_add_64(r + 64, r + 64, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x11, %[a], 512\n\t" + "\n1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldp x3, x4, [%[a]], #16\n\t" + "ldp x5, x6, [%[a]], #16\n\t" + "ldp x7, x8, [%[b]], #16\n\t" + "ldp x9, x10, [%[b]], #16\n\t" + "adcs x3, x3, x7\n\t" + "adcs x4, x4, x8\n\t" + "adcs x5, x5, x9\n\t" + "adcs x6, x6, x10\n\t" + "stp x3, x4, [%[r]], #16\n\t" + "stp x5, x6, [%[r]], #16\n\t" + "cset %[c], cs\n\t" + "cmp %[a], x11\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "add x10, %[a], 512\n\t" + "\n1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldp x2, x3, [%[a]]\n\t" + "ldp x4, x5, [%[a], #16]\n\t" + "ldp x6, x7, [%[b]], #16\n\t" + "ldp x8, x9, [%[b]], #16\n\t" + "sbcs x2, x2, x6\n\t" + "sbcs x3, x3, x7\n\t" + "sbcs x4, x4, x8\n\t" + "sbcs x5, x5, x9\n\t" + "stp x2, x3, [%[a]], #16\n\t" + "stp x4, x5, [%[a]], #16\n\t" + "csetm %[c], cc\n\t" + "cmp %[a], x10\n\t" + "b.ne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) +{ + sp_digit tmp[128]; + + __asm__ __volatile__ ( + "mov x5, 0\n\t" + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 504\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[b], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 512\n\t" + "b.eq 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 1008\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + sp_digit tmp[128]; + + __asm__ __volatile__ ( + "mov x6, 0\n\t" + "mov x7, 0\n\t" + "mov x8, 0\n\t" + "mov x5, 0\n\t" + "\n1:\n\t" + "subs x3, x5, 504\n\t" + "csel x3, xzr, x3, cc\n\t" + "sub x4, x5, x3\n\t" + "\n2:\n\t" + "cmp x4, x3\n\t" + "b.eq 4f\n\t" + "ldr x10, [%[a], x3]\n\t" + "ldr x11, [%[a], x4]\n\t" + "mul x9, x10, x11\n\t" + "umulh x10, x10, x11\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "b.al 5f\n\t" + "\n4:\n\t" + "ldr x10, [%[a], x3]\n\t" + "mul x9, x10, x10\n\t" + "umulh x10, x10, x10\n\t" + "adds x6, x6, x9\n\t" + "adcs x7, x7, x10\n\t" + "adc x8, x8, xzr\n\t" + "\n5:\n\t" + "add x3, x3, #8\n\t" + "sub x4, x4, #8\n\t" + "cmp x3, 512\n\t" + "b.eq 3f\n\t" + "cmp x3, x4\n\t" + "b.gt 3f\n\t" + "cmp x3, x5\n\t" + "b.le 2b\n\t" + "\n3:\n\t" + "str x6, [%[r], x5]\n\t" + "mov x6, x7\n\t" + "mov x7, x8\n\t" + "mov x8, #0\n\t" + "add x5, x5, #8\n\t" + "cmp x5, 1008\n\t" + "b.le 1b\n\t" + "str x6, [%[r], x5]\n\t" + : + : [r] "r" (tmp), [a] "r" (a) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_4096_mul_d_64(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, 0\n\t" + "str x5, [%[r]]\n\t" + "mov x5, 0\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 512\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 512]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x3, %[b], x8\n\t" + "umulh x4, %[b], x8\n\t" + "mov x5, 0\n\t" + "str x3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr x8, [%[a], 8]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 8]\n\t" + "# A[2] * B\n\t" + "ldr x8, [%[a], 16]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 16]\n\t" + "# A[3] * B\n\t" + "ldr x8, [%[a], 24]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 24]\n\t" + "# A[4] * B\n\t" + "ldr x8, [%[a], 32]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 32]\n\t" + "# A[5] * B\n\t" + "ldr x8, [%[a], 40]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 40]\n\t" + "# A[6] * B\n\t" + "ldr x8, [%[a], 48]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 48]\n\t" + "# A[7] * B\n\t" + "ldr x8, [%[a], 56]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 56]\n\t" + "# A[8] * B\n\t" + "ldr x8, [%[a], 64]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 64]\n\t" + "# A[9] * B\n\t" + "ldr x8, [%[a], 72]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 72]\n\t" + "# A[10] * B\n\t" + "ldr x8, [%[a], 80]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 80]\n\t" + "# A[11] * B\n\t" + "ldr x8, [%[a], 88]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 88]\n\t" + "# A[12] * B\n\t" + "ldr x8, [%[a], 96]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 96]\n\t" + "# A[13] * B\n\t" + "ldr x8, [%[a], 104]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 104]\n\t" + "# A[14] * B\n\t" + "ldr x8, [%[a], 112]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 112]\n\t" + "# A[15] * B\n\t" + "ldr x8, [%[a], 120]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 120]\n\t" + "# A[16] * B\n\t" + "ldr x8, [%[a], 128]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 128]\n\t" + "# A[17] * B\n\t" + "ldr x8, [%[a], 136]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 136]\n\t" + "# A[18] * B\n\t" + "ldr x8, [%[a], 144]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 144]\n\t" + "# A[19] * B\n\t" + "ldr x8, [%[a], 152]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 152]\n\t" + "# A[20] * B\n\t" + "ldr x8, [%[a], 160]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 160]\n\t" + "# A[21] * B\n\t" + "ldr x8, [%[a], 168]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 168]\n\t" + "# A[22] * B\n\t" + "ldr x8, [%[a], 176]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 176]\n\t" + "# A[23] * B\n\t" + "ldr x8, [%[a], 184]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 184]\n\t" + "# A[24] * B\n\t" + "ldr x8, [%[a], 192]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 192]\n\t" + "# A[25] * B\n\t" + "ldr x8, [%[a], 200]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 200]\n\t" + "# A[26] * B\n\t" + "ldr x8, [%[a], 208]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 208]\n\t" + "# A[27] * B\n\t" + "ldr x8, [%[a], 216]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 216]\n\t" + "# A[28] * B\n\t" + "ldr x8, [%[a], 224]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 224]\n\t" + "# A[29] * B\n\t" + "ldr x8, [%[a], 232]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 232]\n\t" + "# A[30] * B\n\t" + "ldr x8, [%[a], 240]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 240]\n\t" + "# A[31] * B\n\t" + "ldr x8, [%[a], 248]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 248]\n\t" + "# A[32] * B\n\t" + "ldr x8, [%[a], 256]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 256]\n\t" + "# A[33] * B\n\t" + "ldr x8, [%[a], 264]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 264]\n\t" + "# A[34] * B\n\t" + "ldr x8, [%[a], 272]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 272]\n\t" + "# A[35] * B\n\t" + "ldr x8, [%[a], 280]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 280]\n\t" + "# A[36] * B\n\t" + "ldr x8, [%[a], 288]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 288]\n\t" + "# A[37] * B\n\t" + "ldr x8, [%[a], 296]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 296]\n\t" + "# A[38] * B\n\t" + "ldr x8, [%[a], 304]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 304]\n\t" + "# A[39] * B\n\t" + "ldr x8, [%[a], 312]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 312]\n\t" + "# A[40] * B\n\t" + "ldr x8, [%[a], 320]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 320]\n\t" + "# A[41] * B\n\t" + "ldr x8, [%[a], 328]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 328]\n\t" + "# A[42] * B\n\t" + "ldr x8, [%[a], 336]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 336]\n\t" + "# A[43] * B\n\t" + "ldr x8, [%[a], 344]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 344]\n\t" + "# A[44] * B\n\t" + "ldr x8, [%[a], 352]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 352]\n\t" + "# A[45] * B\n\t" + "ldr x8, [%[a], 360]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 360]\n\t" + "# A[46] * B\n\t" + "ldr x8, [%[a], 368]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 368]\n\t" + "# A[47] * B\n\t" + "ldr x8, [%[a], 376]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 376]\n\t" + "# A[48] * B\n\t" + "ldr x8, [%[a], 384]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 384]\n\t" + "# A[49] * B\n\t" + "ldr x8, [%[a], 392]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 392]\n\t" + "# A[50] * B\n\t" + "ldr x8, [%[a], 400]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 400]\n\t" + "# A[51] * B\n\t" + "ldr x8, [%[a], 408]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 408]\n\t" + "# A[52] * B\n\t" + "ldr x8, [%[a], 416]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 416]\n\t" + "# A[53] * B\n\t" + "ldr x8, [%[a], 424]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 424]\n\t" + "# A[54] * B\n\t" + "ldr x8, [%[a], 432]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 432]\n\t" + "# A[55] * B\n\t" + "ldr x8, [%[a], 440]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 440]\n\t" + "# A[56] * B\n\t" + "ldr x8, [%[a], 448]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 448]\n\t" + "# A[57] * B\n\t" + "ldr x8, [%[a], 456]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 456]\n\t" + "# A[58] * B\n\t" + "ldr x8, [%[a], 464]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 464]\n\t" + "# A[59] * B\n\t" + "ldr x8, [%[a], 472]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 472]\n\t" + "# A[60] * B\n\t" + "ldr x8, [%[a], 480]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 480]\n\t" + "# A[61] * B\n\t" + "ldr x8, [%[a], 488]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 488]\n\t" + "# A[62] * B\n\t" + "ldr x8, [%[a], 496]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 496]\n\t" + "# A[63] * B\n\t" + "ldr x8, [%[a], 504]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adc x4, x4, x7\n\t" + "str x3, [%[r], 504]\n\t" + "str x4, [%[r], 512]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#endif +} + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A signle precision number. + */ +static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 64); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_64(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "subs %[c], xzr, %[c]\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "csetm %[c], cc\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 512\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + + "ldr x4, [%[a], 0]\n\t" + "ldr x6, [%[a], 8]\n\t" + "ldr x5, [%[b], 0]\n\t" + "ldr x7, [%[b], 8]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "subs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 0]\n\t" + "str x6, [%[r], 8]\n\t" + "ldr x4, [%[a], 16]\n\t" + "ldr x6, [%[a], 24]\n\t" + "ldr x5, [%[b], 16]\n\t" + "ldr x7, [%[b], 24]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 16]\n\t" + "str x6, [%[r], 24]\n\t" + "ldr x4, [%[a], 32]\n\t" + "ldr x6, [%[a], 40]\n\t" + "ldr x5, [%[b], 32]\n\t" + "ldr x7, [%[b], 40]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 32]\n\t" + "str x6, [%[r], 40]\n\t" + "ldr x4, [%[a], 48]\n\t" + "ldr x6, [%[a], 56]\n\t" + "ldr x5, [%[b], 48]\n\t" + "ldr x7, [%[b], 56]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 48]\n\t" + "str x6, [%[r], 56]\n\t" + "ldr x4, [%[a], 64]\n\t" + "ldr x6, [%[a], 72]\n\t" + "ldr x5, [%[b], 64]\n\t" + "ldr x7, [%[b], 72]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 64]\n\t" + "str x6, [%[r], 72]\n\t" + "ldr x4, [%[a], 80]\n\t" + "ldr x6, [%[a], 88]\n\t" + "ldr x5, [%[b], 80]\n\t" + "ldr x7, [%[b], 88]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 80]\n\t" + "str x6, [%[r], 88]\n\t" + "ldr x4, [%[a], 96]\n\t" + "ldr x6, [%[a], 104]\n\t" + "ldr x5, [%[b], 96]\n\t" + "ldr x7, [%[b], 104]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 96]\n\t" + "str x6, [%[r], 104]\n\t" + "ldr x4, [%[a], 112]\n\t" + "ldr x6, [%[a], 120]\n\t" + "ldr x5, [%[b], 112]\n\t" + "ldr x7, [%[b], 120]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 112]\n\t" + "str x6, [%[r], 120]\n\t" + "ldr x4, [%[a], 128]\n\t" + "ldr x6, [%[a], 136]\n\t" + "ldr x5, [%[b], 128]\n\t" + "ldr x7, [%[b], 136]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 128]\n\t" + "str x6, [%[r], 136]\n\t" + "ldr x4, [%[a], 144]\n\t" + "ldr x6, [%[a], 152]\n\t" + "ldr x5, [%[b], 144]\n\t" + "ldr x7, [%[b], 152]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 144]\n\t" + "str x6, [%[r], 152]\n\t" + "ldr x4, [%[a], 160]\n\t" + "ldr x6, [%[a], 168]\n\t" + "ldr x5, [%[b], 160]\n\t" + "ldr x7, [%[b], 168]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 160]\n\t" + "str x6, [%[r], 168]\n\t" + "ldr x4, [%[a], 176]\n\t" + "ldr x6, [%[a], 184]\n\t" + "ldr x5, [%[b], 176]\n\t" + "ldr x7, [%[b], 184]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 176]\n\t" + "str x6, [%[r], 184]\n\t" + "ldr x4, [%[a], 192]\n\t" + "ldr x6, [%[a], 200]\n\t" + "ldr x5, [%[b], 192]\n\t" + "ldr x7, [%[b], 200]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 192]\n\t" + "str x6, [%[r], 200]\n\t" + "ldr x4, [%[a], 208]\n\t" + "ldr x6, [%[a], 216]\n\t" + "ldr x5, [%[b], 208]\n\t" + "ldr x7, [%[b], 216]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 208]\n\t" + "str x6, [%[r], 216]\n\t" + "ldr x4, [%[a], 224]\n\t" + "ldr x6, [%[a], 232]\n\t" + "ldr x5, [%[b], 224]\n\t" + "ldr x7, [%[b], 232]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 224]\n\t" + "str x6, [%[r], 232]\n\t" + "ldr x4, [%[a], 240]\n\t" + "ldr x6, [%[a], 248]\n\t" + "ldr x5, [%[b], 240]\n\t" + "ldr x7, [%[b], 248]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 240]\n\t" + "str x6, [%[r], 248]\n\t" + "ldr x4, [%[a], 256]\n\t" + "ldr x6, [%[a], 264]\n\t" + "ldr x5, [%[b], 256]\n\t" + "ldr x7, [%[b], 264]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 256]\n\t" + "str x6, [%[r], 264]\n\t" + "ldr x4, [%[a], 272]\n\t" + "ldr x6, [%[a], 280]\n\t" + "ldr x5, [%[b], 272]\n\t" + "ldr x7, [%[b], 280]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 272]\n\t" + "str x6, [%[r], 280]\n\t" + "ldr x4, [%[a], 288]\n\t" + "ldr x6, [%[a], 296]\n\t" + "ldr x5, [%[b], 288]\n\t" + "ldr x7, [%[b], 296]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 288]\n\t" + "str x6, [%[r], 296]\n\t" + "ldr x4, [%[a], 304]\n\t" + "ldr x6, [%[a], 312]\n\t" + "ldr x5, [%[b], 304]\n\t" + "ldr x7, [%[b], 312]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 304]\n\t" + "str x6, [%[r], 312]\n\t" + "ldr x4, [%[a], 320]\n\t" + "ldr x6, [%[a], 328]\n\t" + "ldr x5, [%[b], 320]\n\t" + "ldr x7, [%[b], 328]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 320]\n\t" + "str x6, [%[r], 328]\n\t" + "ldr x4, [%[a], 336]\n\t" + "ldr x6, [%[a], 344]\n\t" + "ldr x5, [%[b], 336]\n\t" + "ldr x7, [%[b], 344]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 336]\n\t" + "str x6, [%[r], 344]\n\t" + "ldr x4, [%[a], 352]\n\t" + "ldr x6, [%[a], 360]\n\t" + "ldr x5, [%[b], 352]\n\t" + "ldr x7, [%[b], 360]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 352]\n\t" + "str x6, [%[r], 360]\n\t" + "ldr x4, [%[a], 368]\n\t" + "ldr x6, [%[a], 376]\n\t" + "ldr x5, [%[b], 368]\n\t" + "ldr x7, [%[b], 376]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 368]\n\t" + "str x6, [%[r], 376]\n\t" + "ldr x4, [%[a], 384]\n\t" + "ldr x6, [%[a], 392]\n\t" + "ldr x5, [%[b], 384]\n\t" + "ldr x7, [%[b], 392]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 384]\n\t" + "str x6, [%[r], 392]\n\t" + "ldr x4, [%[a], 400]\n\t" + "ldr x6, [%[a], 408]\n\t" + "ldr x5, [%[b], 400]\n\t" + "ldr x7, [%[b], 408]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 400]\n\t" + "str x6, [%[r], 408]\n\t" + "ldr x4, [%[a], 416]\n\t" + "ldr x6, [%[a], 424]\n\t" + "ldr x5, [%[b], 416]\n\t" + "ldr x7, [%[b], 424]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 416]\n\t" + "str x6, [%[r], 424]\n\t" + "ldr x4, [%[a], 432]\n\t" + "ldr x6, [%[a], 440]\n\t" + "ldr x5, [%[b], 432]\n\t" + "ldr x7, [%[b], 440]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 432]\n\t" + "str x6, [%[r], 440]\n\t" + "ldr x4, [%[a], 448]\n\t" + "ldr x6, [%[a], 456]\n\t" + "ldr x5, [%[b], 448]\n\t" + "ldr x7, [%[b], 456]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 448]\n\t" + "str x6, [%[r], 456]\n\t" + "ldr x4, [%[a], 464]\n\t" + "ldr x6, [%[a], 472]\n\t" + "ldr x5, [%[b], 464]\n\t" + "ldr x7, [%[b], 472]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 464]\n\t" + "str x6, [%[r], 472]\n\t" + "ldr x4, [%[a], 480]\n\t" + "ldr x6, [%[a], 488]\n\t" + "ldr x5, [%[b], 480]\n\t" + "ldr x7, [%[b], 488]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 480]\n\t" + "str x6, [%[r], 488]\n\t" + "ldr x4, [%[a], 496]\n\t" + "ldr x6, [%[a], 504]\n\t" + "ldr x5, [%[b], 496]\n\t" + "ldr x7, [%[b], 504]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "sbcs x4, x4, x5\n\t" + "sbcs x6, x6, x7\n\t" + "str x4, [%[r], 496]\n\t" + "str x6, [%[r], 504]\n\t" + "csetm %[c], cc\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "ldp x12, x13, [%[m], 0]\n\t" + "ldp x14, x15, [%[m], 16]\n\t" + "ldp x16, x17, [%[m], 32]\n\t" + "ldp x19, x20, [%[m], 48]\n\t" + "ldp x21, x22, [%[m], 64]\n\t" + "ldp x23, x24, [%[m], 80]\n\t" + "ldp x25, x26, [%[m], 96]\n\t" + "ldp x27, x28, [%[m], 112]\n\t" + "# i = 0\n\t" + "mov x3, 0\n\t" + "ldp x10, x11, [%[a], 0]\n\t" + "\n1:\n\t" + "# mu = a[i] * mp\n\t" + "mul x8, %[mp], x10\n\t" + "# a[i+0] += m[0] * mu\n\t" + "ldr x9, [%[a], 0]\n\t" + "mul x6, x12, x8\n\t" + "umulh x7, x12, x8\n\t" + "adds x10, x10, x6\n\t" + "adc x5, x7, xzr\n\t" + "# a[i+1] += m[1] * mu\n\t" + "ldr x9, [%[a], 8]\n\t" + "mul x6, x13, x8\n\t" + "umulh x7, x13, x8\n\t" + "adds x10, x11, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x10, x10, x5\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+2] += m[2] * mu\n\t" + "ldr x11, [%[a], 16]\n\t" + "mul x6, x14, x8\n\t" + "umulh x7, x14, x8\n\t" + "adds x11, x11, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x11, x11, x4\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+3] += m[3] * mu\n\t" + "ldr x9, [%[a], 24]\n\t" + "mul x6, x15, x8\n\t" + "umulh x7, x15, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 24]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+4] += m[4] * mu\n\t" + "ldr x9, [%[a], 32]\n\t" + "mul x6, x16, x8\n\t" + "umulh x7, x16, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 32]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+5] += m[5] * mu\n\t" + "ldr x9, [%[a], 40]\n\t" + "mul x6, x17, x8\n\t" + "umulh x7, x17, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 40]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+6] += m[6] * mu\n\t" + "ldr x9, [%[a], 48]\n\t" + "mul x6, x19, x8\n\t" + "umulh x7, x19, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 48]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+7] += m[7] * mu\n\t" + "ldr x9, [%[a], 56]\n\t" + "mul x6, x20, x8\n\t" + "umulh x7, x20, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 56]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+8] += m[8] * mu\n\t" + "ldr x9, [%[a], 64]\n\t" + "mul x6, x21, x8\n\t" + "umulh x7, x21, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 64]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+9] += m[9] * mu\n\t" + "ldr x9, [%[a], 72]\n\t" + "mul x6, x22, x8\n\t" + "umulh x7, x22, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 72]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+10] += m[10] * mu\n\t" + "ldr x9, [%[a], 80]\n\t" + "mul x6, x23, x8\n\t" + "umulh x7, x23, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 80]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+11] += m[11] * mu\n\t" + "ldr x9, [%[a], 88]\n\t" + "mul x6, x24, x8\n\t" + "umulh x7, x24, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 88]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+12] += m[12] * mu\n\t" + "ldr x9, [%[a], 96]\n\t" + "mul x6, x25, x8\n\t" + "umulh x7, x25, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 96]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+13] += m[13] * mu\n\t" + "ldr x9, [%[a], 104]\n\t" + "mul x6, x26, x8\n\t" + "umulh x7, x26, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 104]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+14] += m[14] * mu\n\t" + "ldr x9, [%[a], 112]\n\t" + "mul x6, x27, x8\n\t" + "umulh x7, x27, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 112]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+15] += m[15] * mu\n\t" + "ldr x9, [%[a], 120]\n\t" + "mul x6, x28, x8\n\t" + "umulh x7, x28, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 120]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+16] += m[16] * mu\n\t" + "ldr x7, [%[m], 128]\n\t" + "ldr x9, [%[a], 128]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 128]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+17] += m[17] * mu\n\t" + "ldr x7, [%[m], 136]\n\t" + "ldr x9, [%[a], 136]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 136]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+18] += m[18] * mu\n\t" + "ldr x7, [%[m], 144]\n\t" + "ldr x9, [%[a], 144]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 144]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+19] += m[19] * mu\n\t" + "ldr x7, [%[m], 152]\n\t" + "ldr x9, [%[a], 152]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 152]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+20] += m[20] * mu\n\t" + "ldr x7, [%[m], 160]\n\t" + "ldr x9, [%[a], 160]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 160]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+21] += m[21] * mu\n\t" + "ldr x7, [%[m], 168]\n\t" + "ldr x9, [%[a], 168]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 168]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+22] += m[22] * mu\n\t" + "ldr x7, [%[m], 176]\n\t" + "ldr x9, [%[a], 176]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 176]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+23] += m[23] * mu\n\t" + "ldr x7, [%[m], 184]\n\t" + "ldr x9, [%[a], 184]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 184]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+24] += m[24] * mu\n\t" + "ldr x7, [%[m], 192]\n\t" + "ldr x9, [%[a], 192]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 192]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+25] += m[25] * mu\n\t" + "ldr x7, [%[m], 200]\n\t" + "ldr x9, [%[a], 200]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 200]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+26] += m[26] * mu\n\t" + "ldr x7, [%[m], 208]\n\t" + "ldr x9, [%[a], 208]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 208]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+27] += m[27] * mu\n\t" + "ldr x7, [%[m], 216]\n\t" + "ldr x9, [%[a], 216]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 216]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+28] += m[28] * mu\n\t" + "ldr x7, [%[m], 224]\n\t" + "ldr x9, [%[a], 224]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 224]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+29] += m[29] * mu\n\t" + "ldr x7, [%[m], 232]\n\t" + "ldr x9, [%[a], 232]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 232]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+30] += m[30] * mu\n\t" + "ldr x7, [%[m], 240]\n\t" + "ldr x9, [%[a], 240]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 240]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+31] += m[31] * mu\n\t" + "ldr x7, [%[m], 248]\n\t" + "ldr x9, [%[a], 248]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 248]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+32] += m[32] * mu\n\t" + "ldr x7, [%[m], 256]\n\t" + "ldr x9, [%[a], 256]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 256]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+33] += m[33] * mu\n\t" + "ldr x7, [%[m], 264]\n\t" + "ldr x9, [%[a], 264]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 264]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+34] += m[34] * mu\n\t" + "ldr x7, [%[m], 272]\n\t" + "ldr x9, [%[a], 272]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 272]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+35] += m[35] * mu\n\t" + "ldr x7, [%[m], 280]\n\t" + "ldr x9, [%[a], 280]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 280]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+36] += m[36] * mu\n\t" + "ldr x7, [%[m], 288]\n\t" + "ldr x9, [%[a], 288]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 288]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+37] += m[37] * mu\n\t" + "ldr x7, [%[m], 296]\n\t" + "ldr x9, [%[a], 296]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 296]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+38] += m[38] * mu\n\t" + "ldr x7, [%[m], 304]\n\t" + "ldr x9, [%[a], 304]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 304]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+39] += m[39] * mu\n\t" + "ldr x7, [%[m], 312]\n\t" + "ldr x9, [%[a], 312]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 312]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+40] += m[40] * mu\n\t" + "ldr x7, [%[m], 320]\n\t" + "ldr x9, [%[a], 320]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 320]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+41] += m[41] * mu\n\t" + "ldr x7, [%[m], 328]\n\t" + "ldr x9, [%[a], 328]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 328]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+42] += m[42] * mu\n\t" + "ldr x7, [%[m], 336]\n\t" + "ldr x9, [%[a], 336]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 336]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+43] += m[43] * mu\n\t" + "ldr x7, [%[m], 344]\n\t" + "ldr x9, [%[a], 344]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 344]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+44] += m[44] * mu\n\t" + "ldr x7, [%[m], 352]\n\t" + "ldr x9, [%[a], 352]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 352]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+45] += m[45] * mu\n\t" + "ldr x7, [%[m], 360]\n\t" + "ldr x9, [%[a], 360]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 360]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+46] += m[46] * mu\n\t" + "ldr x7, [%[m], 368]\n\t" + "ldr x9, [%[a], 368]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 368]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+47] += m[47] * mu\n\t" + "ldr x7, [%[m], 376]\n\t" + "ldr x9, [%[a], 376]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 376]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+48] += m[48] * mu\n\t" + "ldr x7, [%[m], 384]\n\t" + "ldr x9, [%[a], 384]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 384]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+49] += m[49] * mu\n\t" + "ldr x7, [%[m], 392]\n\t" + "ldr x9, [%[a], 392]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 392]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+50] += m[50] * mu\n\t" + "ldr x7, [%[m], 400]\n\t" + "ldr x9, [%[a], 400]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 400]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+51] += m[51] * mu\n\t" + "ldr x7, [%[m], 408]\n\t" + "ldr x9, [%[a], 408]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 408]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+52] += m[52] * mu\n\t" + "ldr x7, [%[m], 416]\n\t" + "ldr x9, [%[a], 416]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 416]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+53] += m[53] * mu\n\t" + "ldr x7, [%[m], 424]\n\t" + "ldr x9, [%[a], 424]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 424]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+54] += m[54] * mu\n\t" + "ldr x7, [%[m], 432]\n\t" + "ldr x9, [%[a], 432]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 432]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+55] += m[55] * mu\n\t" + "ldr x7, [%[m], 440]\n\t" + "ldr x9, [%[a], 440]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 440]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+56] += m[56] * mu\n\t" + "ldr x7, [%[m], 448]\n\t" + "ldr x9, [%[a], 448]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 448]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+57] += m[57] * mu\n\t" + "ldr x7, [%[m], 456]\n\t" + "ldr x9, [%[a], 456]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 456]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+58] += m[58] * mu\n\t" + "ldr x7, [%[m], 464]\n\t" + "ldr x9, [%[a], 464]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 464]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+59] += m[59] * mu\n\t" + "ldr x7, [%[m], 472]\n\t" + "ldr x9, [%[a], 472]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 472]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+60] += m[60] * mu\n\t" + "ldr x7, [%[m], 480]\n\t" + "ldr x9, [%[a], 480]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 480]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+61] += m[61] * mu\n\t" + "ldr x7, [%[m], 488]\n\t" + "ldr x9, [%[a], 488]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x4, x7, xzr\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 488]\n\t" + "adc x4, x4, xzr\n\t" + "# a[i+62] += m[62] * mu\n\t" + "ldr x7, [%[m], 496]\n\t" + "ldr x9, [%[a], 496]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x9, x9, x6\n\t" + "adc x5, x7, xzr\n\t" + "adds x9, x9, x4\n\t" + "str x9, [%[a], 496]\n\t" + "adc x5, x5, xzr\n\t" + "# a[i+63] += m[63] * mu\n\t" + "ldr x7, [%[m], 504]\n\t" + "ldr x9, [%[a], 504]\n\t" + "mul x6, x7, x8\n\t" + "umulh x7, x7, x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x7, x7, %[ca]\n\t" + "cset %[ca], cs\n\t" + "adds x9, x9, x5\n\t" + "str x9, [%[a], 504]\n\t" + "ldr x9, [%[a], 512]\n\t" + "adcs x9, x9, x7\n\t" + "str x9, [%[a], 512]\n\t" + "adc %[ca], %[ca], xzr\n\t" + "# i += 1\n\t" + "add %[a], %[a], 8\n\t" + "add x3, x3, 8\n\t" + "cmp x3, 512\n\t" + "blt 1b\n\t" + "str x10, [%[a], 0]\n\t" + "str x11, [%[a], 8]\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + + sp_4096_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_64(r, a, b); + sp_4096_mont_reduce_64(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_64(r, a); + sp_4096_mont_reduce_64(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) +{ + sp_digit r; + + __asm__ __volatile__ ( + "lsr x5, %[div], 32\n\t" + "add x5, x5, 1\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x6, x3, 32\n\t" + "mul x4, %[div], x6\n\t" + "umulh x3, %[div], x6\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "udiv x3, %[d1], x5\n\t" + "lsl x3, x3, 32\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsl x3, %[d1], 32\n\t" + "orr x3, x3, %[d0], lsr 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "lsl x3, %[d1], 32\n\t" + "orr x3, x3, %[d0], lsr 32\n\t" + + "udiv x3, x3, x5\n\t" + "add x6, x6, x3\n\t" + "mul x4, %[div], x3\n\t" + "umulh x3, %[div], x3\n\t" + "subs %[d0], %[d0], x4\n\t" + "sbc %[d1], %[d1], x3\n\t" + + "udiv x3, %[d0], %[div]\n\t" + "add x6, x6, x3\n\t" + "mul x3, %[div], x3\n\t" + "sub %[d0], %[d0], x3\n\t" + "mov %[r], x6\n\t" + + : [r] "=r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "x3", "x4", "x5", "x6" + ); + + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static int64_t sp_4096_cmp_64(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = -1; + sp_digit one = 1; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x3, -1\n\t" + "mov x6, 504\n\t" + "1:\n\t" + "ldr x4, [%[a], x6]\n\t" + "ldr x5, [%[b], x6]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "subs x6, x6, #8\n\t" + "b.cs 1b\n\t" + "eor %[r], %[r], x3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "x2", "x3", "x4", "x5", "x6" + ); +#else + __asm__ __volatile__ ( + "mov x3, -1\n\t" + "ldr x4, [%[a], 504]\n\t" + "ldr x5, [%[b], 504]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 496]\n\t" + "ldr x5, [%[b], 496]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 488]\n\t" + "ldr x5, [%[b], 488]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 480]\n\t" + "ldr x5, [%[b], 480]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 472]\n\t" + "ldr x5, [%[b], 472]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 464]\n\t" + "ldr x5, [%[b], 464]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 456]\n\t" + "ldr x5, [%[b], 456]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 448]\n\t" + "ldr x5, [%[b], 448]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 440]\n\t" + "ldr x5, [%[b], 440]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 432]\n\t" + "ldr x5, [%[b], 432]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 424]\n\t" + "ldr x5, [%[b], 424]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 416]\n\t" + "ldr x5, [%[b], 416]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 408]\n\t" + "ldr x5, [%[b], 408]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 400]\n\t" + "ldr x5, [%[b], 400]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 392]\n\t" + "ldr x5, [%[b], 392]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 384]\n\t" + "ldr x5, [%[b], 384]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 376]\n\t" + "ldr x5, [%[b], 376]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 368]\n\t" + "ldr x5, [%[b], 368]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 360]\n\t" + "ldr x5, [%[b], 360]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 352]\n\t" + "ldr x5, [%[b], 352]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 344]\n\t" + "ldr x5, [%[b], 344]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 336]\n\t" + "ldr x5, [%[b], 336]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 328]\n\t" + "ldr x5, [%[b], 328]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 320]\n\t" + "ldr x5, [%[b], 320]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 312]\n\t" + "ldr x5, [%[b], 312]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 304]\n\t" + "ldr x5, [%[b], 304]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 296]\n\t" + "ldr x5, [%[b], 296]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 288]\n\t" + "ldr x5, [%[b], 288]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 280]\n\t" + "ldr x5, [%[b], 280]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 272]\n\t" + "ldr x5, [%[b], 272]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 264]\n\t" + "ldr x5, [%[b], 264]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 256]\n\t" + "ldr x5, [%[b], 256]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 248]\n\t" + "ldr x5, [%[b], 248]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 240]\n\t" + "ldr x5, [%[b], 240]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 232]\n\t" + "ldr x5, [%[b], 232]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 224]\n\t" + "ldr x5, [%[b], 224]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 216]\n\t" + "ldr x5, [%[b], 216]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 208]\n\t" + "ldr x5, [%[b], 208]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 200]\n\t" + "ldr x5, [%[b], 200]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 192]\n\t" + "ldr x5, [%[b], 192]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 184]\n\t" + "ldr x5, [%[b], 184]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 176]\n\t" + "ldr x5, [%[b], 176]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 168]\n\t" + "ldr x5, [%[b], 168]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 160]\n\t" + "ldr x5, [%[b], 160]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 152]\n\t" + "ldr x5, [%[b], 152]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 144]\n\t" + "ldr x5, [%[b], 144]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 136]\n\t" + "ldr x5, [%[b], 136]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 128]\n\t" + "ldr x5, [%[b], 128]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 120]\n\t" + "ldr x5, [%[b], 120]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 112]\n\t" + "ldr x5, [%[b], 112]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 104]\n\t" + "ldr x5, [%[b], 104]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 96]\n\t" + "ldr x5, [%[b], 96]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 88]\n\t" + "ldr x5, [%[b], 88]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 80]\n\t" + "ldr x5, [%[b], 80]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 72]\n\t" + "ldr x5, [%[b], 72]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 64]\n\t" + "ldr x5, [%[b], 64]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 56]\n\t" + "ldr x5, [%[b], 56]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 48]\n\t" + "ldr x5, [%[b], 48]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 40]\n\t" + "ldr x5, [%[b], 40]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 32]\n\t" + "ldr x5, [%[b], 32]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 24]\n\t" + "ldr x5, [%[b], 24]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 16]\n\t" + "ldr x5, [%[b], 16]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 8]\n\t" + "ldr x5, [%[b], 8]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "ldr x4, [%[a], 0]\n\t" + "ldr x5, [%[b], 0]\n\t" + "and x4, x4, x3\n\t" + "and x5, x5, x3\n\t" + "subs x4, x4, x5\n\t" + "csel %[r], %[one], %[r], hi\n\t" + "csel %[r], x3, %[r], lo\n\t" + "csel x3, x3, xzr, eq\n\t" + "eor %[r], %[r], x3\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b), [one] "r" (one) + : "x2", "x3", "x4", "x5", "x6" + ); +#endif + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_4096_mul_d_64(t2, d, r1); + t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + sp_4096_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2); + sp_4096_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_64(t1, d) >= 0; + sp_4096_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_64(a, m, NULL, r); +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div); + + sp_4096_mul_d_64(t2, d, r1); + t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + if (t1[64 + i] != 0) { + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d); + if (t1[64 + i] != 0) + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d); + } + } + + r1 = sp_4096_cmp_64(t1, d) >= 0; + sp_4096_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_64_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][128]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_4096_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_4096_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_64(t[10], t[ 5], m, mp); + sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_64(t[12], t[ 6], m, mp); + sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_64(t[14], t[ 7], m, mp); + sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 4; + if (c == 64) { + c = 60; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 60; + n <<= 4; + c = 60; + } + else if (c < 4) { + y = n >> 60; + n = e[i--]; + c = 4 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 60) & 0xf; + n <<= 4; + c -= 4; + } + + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + + sp_4096_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][128]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 128; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64U); + if (reduceA != 0) { + err = sp_4096_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_64(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_4096_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_64(t[10], t[ 5], m, mp); + sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_64(t[12], t[ 6], m, mp); + sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_64(t[14], t[ 7], m, mp); + sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_64(t[16], t[ 8], m, mp); + sp_4096_mont_mul_64(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_64(t[18], t[ 9], m, mp); + sp_4096_mont_mul_64(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_64(t[20], t[10], m, mp); + sp_4096_mont_mul_64(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_64(t[22], t[11], m, mp); + sp_4096_mont_mul_64(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_64(t[24], t[12], m, mp); + sp_4096_mont_mul_64(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_64(t[26], t[13], m, mp); + sp_4096_mont_mul_64(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_64(t[28], t[14], m, mp); + sp_4096_mont_mul_64(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_64(t[30], t[15], m, mp); + sp_4096_mont_mul_64(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 5; + if (c == 64) { + c = 59; + } + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 59; + n <<= 5; + c = 59; + } + else if (c < 5) { + y = n >> 59; + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + + sp_4096_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[128], md[64], rd[128]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit *ah; + sp_digit* m; + sp_digit* r; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 64 * 2; + m = r + 64 * 2; + ah = a + 64; + } +#else + a = ad; + m = md; + r = rd; + ah = a + 64; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(ah, 64, in, inLen); +#if DIGIT_BIT >= 64 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; +#endif + if (e[0] == 0) + err = MP_EXPTMOD_E; + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 64, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_4096_sqr_64(r, ah); + err = sp_4096_mod_64_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_64(r, ah, r); + err = sp_4096_mod_64_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 64); + err = sp_4096_mod_64_cond(a, a, m); + + if (err == MP_OKAY) { + for (i=63; i>=0; i--) + if (e[0] >> i) + break; + + XMEMCPY(r, a, sizeof(sp_digit) * 64); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_64(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) + sp_4096_mont_mul_64(r, r, a, m, mp); + } + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_64(r, m, mp); + + for (i = 63; i > 0; i--) { + if (r[i] != m[i]) + break; + } + if (r[i] >= m[i]) + sp_4096_sub_in_place_64(r, m); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_RSA); +#endif + + return err; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[64 * 2]; + sp_digit pd[32], qd[32], dpd[32]; + sp_digit tmpad[64], tmpbd[64]; +#else + sp_digit* t = NULL; +#endif + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmp; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 64 * 2; + q = p + 32; + qi = dq = dp = q + 32; + tmpa = qi + 32; + tmpb = tmpa + 64; + + tmp = t; + r = tmp + 64; + } +#else + r = a = ad; + p = pd; + q = qd; + qi = dq = dp = dpd; + tmpa = tmpad; + tmpb = tmpbd; + tmp = a + 64; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 64, in, inLen); + sp_4096_from_mp(p, 32, pm); + sp_4096_from_mp(q, 32, qm); + sp_4096_from_mp(dp, 32, dpm); + + err = sp_4096_mod_exp_32(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 32, dqm); + err = sp_4096_mod_exp_32(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_4096_sub_in_place_32(tmpa, tmpb); + sp_4096_mask_32(tmp, p, c); + sp_4096_add_32(tmpa, tmpa, tmp); + + sp_4096_from_mp(qi, 32, qim); + sp_4096_mul_32(tmpa, tmpa, qi); + err = sp_4096_mod_32(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_32(tmpa, q, tmpa); + XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32); + sp_4096_add_64(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpad, 0, sizeof(tmpad)); + XMEMSET(tmpbd, 0, sizeof(tmpbd)); + XMEMSET(pd, 0, sizeof(pd)); + XMEMSET(qd, 0, sizeof(qd)); + XMEMSET(dpd, 0, sizeof(dpd)); +#endif + + return err; +} +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 64); + r->used = 64; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= a[i] << s; + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = a[i] >> s; + } + } + s = 64 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 64, base); + sp_4096_from_mp(e, 64, exp); + sp_4096_from_mp(m, 64, mod); + + err = sp_4096_mod_exp_64(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +static void sp_4096_lshift_64(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov x6, 63\n\t" + "sub x6, x6, %[n]\n\t" + "ldr x3, [%[a], 504]\n\t" + "lsr x4, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x4, x4, x6\n\t" + "ldr x2, [%[a], 496]\n\t" + "str x4, [%[r], 512]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 488]\n\t" + "str x3, [%[r], 504]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 480]\n\t" + "str x2, [%[r], 496]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 472]\n\t" + "str x4, [%[r], 488]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 464]\n\t" + "str x3, [%[r], 480]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 456]\n\t" + "str x2, [%[r], 472]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 448]\n\t" + "str x4, [%[r], 464]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 440]\n\t" + "str x3, [%[r], 456]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 432]\n\t" + "str x2, [%[r], 448]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 424]\n\t" + "str x4, [%[r], 440]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 416]\n\t" + "str x3, [%[r], 432]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 408]\n\t" + "str x2, [%[r], 424]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 400]\n\t" + "str x4, [%[r], 416]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 392]\n\t" + "str x3, [%[r], 408]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 384]\n\t" + "str x2, [%[r], 400]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 376]\n\t" + "str x4, [%[r], 392]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 368]\n\t" + "str x3, [%[r], 384]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 360]\n\t" + "str x2, [%[r], 376]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 352]\n\t" + "str x4, [%[r], 368]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 344]\n\t" + "str x3, [%[r], 360]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 336]\n\t" + "str x2, [%[r], 352]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 328]\n\t" + "str x4, [%[r], 344]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 320]\n\t" + "str x3, [%[r], 336]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 312]\n\t" + "str x2, [%[r], 328]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 304]\n\t" + "str x4, [%[r], 320]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 296]\n\t" + "str x3, [%[r], 312]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 288]\n\t" + "str x2, [%[r], 304]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 280]\n\t" + "str x4, [%[r], 296]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 272]\n\t" + "str x3, [%[r], 288]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 264]\n\t" + "str x2, [%[r], 280]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 256]\n\t" + "str x4, [%[r], 272]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 248]\n\t" + "str x3, [%[r], 264]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 240]\n\t" + "str x2, [%[r], 256]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 232]\n\t" + "str x4, [%[r], 248]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 224]\n\t" + "str x3, [%[r], 240]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 216]\n\t" + "str x2, [%[r], 232]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 208]\n\t" + "str x4, [%[r], 224]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 200]\n\t" + "str x3, [%[r], 216]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 192]\n\t" + "str x2, [%[r], 208]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 184]\n\t" + "str x4, [%[r], 200]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 176]\n\t" + "str x3, [%[r], 192]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 168]\n\t" + "str x2, [%[r], 184]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 160]\n\t" + "str x4, [%[r], 176]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 152]\n\t" + "str x3, [%[r], 168]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 144]\n\t" + "str x2, [%[r], 160]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 136]\n\t" + "str x4, [%[r], 152]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 128]\n\t" + "str x3, [%[r], 144]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 120]\n\t" + "str x2, [%[r], 136]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 112]\n\t" + "str x4, [%[r], 128]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 104]\n\t" + "str x3, [%[r], 120]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 96]\n\t" + "str x2, [%[r], 112]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 88]\n\t" + "str x4, [%[r], 104]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 80]\n\t" + "str x3, [%[r], 96]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 72]\n\t" + "str x2, [%[r], 88]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 64]\n\t" + "str x4, [%[r], 80]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 56]\n\t" + "str x3, [%[r], 72]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 48]\n\t" + "str x2, [%[r], 64]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 40]\n\t" + "str x4, [%[r], 56]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 32]\n\t" + "str x3, [%[r], 48]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 24]\n\t" + "str x2, [%[r], 40]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "ldr x2, [%[a], 16]\n\t" + "str x4, [%[r], 32]\n\t" + "lsr x5, x2, 1\n\t" + "lsl x2, x2, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x3, x3, x5\n\t" + "ldr x4, [%[a], 8]\n\t" + "str x3, [%[r], 24]\n\t" + "lsr x5, x4, 1\n\t" + "lsl x4, x4, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x2, x2, x5\n\t" + "ldr x3, [%[a], 0]\n\t" + "str x2, [%[r], 16]\n\t" + "lsr x5, x3, 1\n\t" + "lsl x3, x3, %[n]\n\t" + "lsr x5, x5, x6\n\t" + "orr x4, x4, x5\n\t" + "str x3, [%[r]]\n\t" + "str x4, [%[r], 8]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "x2", "x3", "x4", "x5", "x6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[128]; + sp_digit td[65]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 128; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) { + c = 64; + } + c -= bits % 6; + if (c == 64) { + c = 58; + } + y = (int)(n >> c); + n <<= 64 - c; + sp_4096_lshift_64(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = n >> 58; + n <<= 6; + c = 58; + } + else if (c < 6) { + y = n >> 58; + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + + sp_4096_lshift_64(r, r, y); + sp_4096_mul_d_64(tmp, norm, r[64]); + r[64] = 0; + o = sp_4096_add_64(r, r, tmp); + sp_4096_cond_sub_64(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 64, base); + sp_4096_from_bin(e, 64, exp, expLen); + sp_4096_from_mp(m, 64, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1) + err = sp_4096_mod_exp_2_64(r, e, expLen * 8, m); + else + #endif + err = sp_4096_mod_exp_64(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_ECC #ifndef WOLFSSL_SP_NO_256 diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index bbab4605d..c6505a66b 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -730,7 +730,7 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, u += sp_2048_add_16(r + 8, r + 8, z1); r[24] = u; XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - sp_2048_add_16(r + 16, r + 16, z2); + (void)sp_2048_add_16(r + 16, r + 16, z2); } /* Square a and put result in r. (r = a * a) @@ -757,7 +757,7 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) u += sp_2048_add_16(r + 8, r + 8, z1); r[24] = u; XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - sp_2048_add_16(r + 16, r + 16, z2); + (void)sp_2048_add_16(r + 16, r + 16, z2); } /* Sub b from a into r. (r = a - b) @@ -1119,7 +1119,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, u += sp_2048_add_32(r + 16, r + 16, z1); r[48] = u; XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - sp_2048_add_32(r + 32, r + 32, z2); + (void)sp_2048_add_32(r + 32, r + 32, z2); } /* Square a and put result in r. (r = a * a) @@ -1146,7 +1146,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) u += sp_2048_add_32(r + 16, r + 16, z1); r[48] = u; XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - sp_2048_add_32(r + 32, r + 32, z2); + (void)sp_2048_add_32(r + 32, r + 32, z2); } /* Sub b from a into r. (r = a - b) @@ -1777,7 +1777,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, u += sp_2048_add_64(r + 32, r + 32, z1); r[96] = u; XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); - sp_2048_add_64(r + 64, r + 64, z2); + (void)sp_2048_add_64(r + 64, r + 64, z2); } /* Square a and put result in r. (r = a * a) @@ -1804,7 +1804,7 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) u += sp_2048_add_64(r + 32, r + 32, z1); r[96] = u; XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); - sp_2048_add_64(r + 64, r + 64, z2); + (void)sp_2048_add_64(r + 64, r + 64, z2); } #endif /* !WOLFSSL_SP_SMALL */ @@ -6000,7 +6000,7 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, u += sp_3072_add_24(r + 12, r + 12, z1); r[36] = u; XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - sp_3072_add_24(r + 24, r + 24, z2); + (void)sp_3072_add_24(r + 24, r + 24, z2); } /* Square a and put result in r. (r = a * a) @@ -6027,7 +6027,7 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) u += sp_3072_add_24(r + 12, r + 12, z1); r[36] = u; XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - sp_3072_add_24(r + 24, r + 24, z2); + (void)sp_3072_add_24(r + 24, r + 24, z2); } /* Sub b from a into r. (r = a - b) @@ -6530,7 +6530,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, u += sp_3072_add_48(r + 24, r + 24, z1); r[72] = u; XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - sp_3072_add_48(r + 48, r + 48, z2); + (void)sp_3072_add_48(r + 48, r + 48, z2); } /* Square a and put result in r. (r = a * a) @@ -6557,7 +6557,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) u += sp_3072_add_48(r + 24, r + 24, z1); r[72] = u; XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - sp_3072_add_48(r + 48, r + 48, z2); + (void)sp_3072_add_48(r + 48, r + 48, z2); } /* Sub b from a into r. (r = a - b) @@ -7455,7 +7455,7 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, u += sp_3072_add_96(r + 48, r + 48, z1); r[144] = u; XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); - sp_3072_add_96(r + 96, r + 96, z2); + (void)sp_3072_add_96(r + 96, r + 96, z2); } /* Square a and put result in r. (r = a * a) @@ -7482,7 +7482,7 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) u += sp_3072_add_96(r + 48, r + 48, z1); r[144] = u; XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); - sp_3072_add_96(r + 96, r + 96, z2); + (void)sp_3072_add_96(r + 96, r + 96, z2); } #endif /* !WOLFSSL_SP_SMALL */ @@ -11133,6 +11133,4510 @@ int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #endif /* !WOLFSSL_SP_NO_3072 */ +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<128 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/ + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = r[i] >> b; b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mvn r7, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r7" + ); + + return c; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sub r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mvn r7, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "add r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #0x80\n\t" + "add %[b], #0x80\n\t" + "add %[r], #0x80\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r7" + ); + + return c; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[64 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit b1[64]; + sp_digit z2[128]; + sp_digit u, ca, cb; + + ca = sp_2048_add_64(a1, a, &a[64]); + cb = sp_2048_add_64(b1, b, &b[64]); + u = ca & cb; + sp_2048_mul_64(z1, a1, b1); + sp_2048_mul_64(z2, &a[64], &b[64]); + sp_2048_mul_64(z0, a, b); + sp_2048_mask_64(r + 128, a1, 0 - cb); + sp_2048_mask_64(b1, b1, 0 - ca); + u += sp_2048_add_64(r + 128, r + 128, b1); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[128]; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit u; + + u = sp_2048_add_64(a1, a, &a[64]); + sp_2048_sqr_64(z1, a1); + sp_2048_sqr_64(z2, &a[64]); + sp_2048_sqr_64(z0, a); + sp_2048_mask_64(r + 128, a1, 0 - u); + u += sp_2048_add_64(r + 128, r + 128, r + 128); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r7, #0\n\t" + "mov r4, #2\n\t" + "lsl r4, #8\n\t" + "sub r7, #1\n\t" + "add r6, r4\n\t" + "\n1:\n\t" + "add %[c], r7\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adc r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "add %[a], #4\n\t" + "add %[b], #4\n\t" + "add %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r7, %[a]\n\t" + "mov r5, #2\n\t" + "lsl r5, #8\n\t" + "add r7, r5\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbc r3, r5\n\t" + "sbc r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c]\n\t" + "add %[a], #8\n\t" + "add %[b], #8\n\t" + "cmp %[a], r7\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[128 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[a]\n\t" + "add %[a], r9\n\t" + "add %[b], r10\n\t" + "\n2:\n\t" + "# Multiply Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [%[b]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply Done\n\t" + "add %[a], #4\n\t" + "sub %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #4\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #252\n\t" + "mov %[a], r8\n\t" + "sub %[a], r6\n\t" + "sbc r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, %[a]\n\t" + "add %[a], r9\n\t" + "add r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + "# Multiply * 2: Start\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r7, [r2]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Multiply * 2: Done\n\t" + "bal 5f\n\t" + "\n4:\n\t" + "# Square: Start\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r6\n\t" + "add r3, r6\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "mul r7, r7\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# Square: Done\n\t" + "\n5:\n\t" + "add %[a], #4\n\t" + "sub r2, #4\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #3\n\t" + "lsl r3, r3, #8\n\t" + "add r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "sub r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #4\n\t" + "lsl r6, r6, #8\n\t" + "add sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A signle precision number. + */ +static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 128); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_128(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #2\n\t" + "lsl r5, r5, #8\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "sbc r5, r6\n\t" + "sbc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r8, %[mp]\n\t" + "mov r12, %[ca]\n\t" + "mov r14, %[m]\n\t" + "mov r9, %[a]\n\t" + "mov r4, #0\n\t" + "# i = 0\n\t" + "mov r11, r4\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "mov %[ca], #0\n\t" + "# mu = a[i] * mp\n\t" + "mov %[mp], r8\n\t" + "ldr %[a], [%[a]]\n\t" + "mul %[mp], %[a]\n\t" + "mov %[m], r14\n\t" + "mov r10, r9\n\t" + "\n2:\n\t" + "# a[i+j] += m[j] * mu\n\t" + "mov %[a], r10\n\t" + "ldr %[a], [%[a]]\n\t" + "mov %[ca], #0\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "# Multiply m[j] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add %[a], r7\n\t" + "adc r5, %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add %[a], r6\n\t" + "adc r5, r7\n\t" + "# Multiply m[j] and mu - Done\n\t" + "add r4, %[a]\n\t" + "adc r5, %[ca]\n\t" + "mov %[a], r10\n\t" + "str r4, [%[a]]\n\t" + "mov r6, #4\n\t" + "add %[m], #4\n\t" + "add r10, r6\n\t" + "mov r4, #1\n\t" + "lsl r4, r4, #8\n\t" + "add r4, #252\n\t" + "add r4, r9\n\t" + "cmp r10, r4\n\t" + "blt 2b\n\t" + "# a[i+127] += m[127] * mu\n\t" + "mov %[ca], #0\n\t" + "mov r4, r12\n\t" + "mov %[a], #0\n\t" + "# Multiply m[127] and mu - Start\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r6, %[mp], #16\n\t" + "lsl r7, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r5, r7\n\t" + "adc r4, %[ca]\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsr r6, %[mp], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "ldr r7, [%[m]]\n\t" + "lsl r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r5, r6\n\t" + "adc r4, r7\n\t" + "adc %[a], %[ca]\n\t" + "# Multiply m[127] and mu - Done\n\t" + "mov %[ca], %[a]\n\t" + "mov %[a], r10\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr %[a], [%[a]]\n\t" + "mov r6, #0\n\t" + "add r5, %[a]\n\t" + "adc r7, r4\n\t" + "adc %[ca], r6\n\t" + "mov %[a], r10\n\t" + "str r5, [%[a]]\n\t" + "str r7, [%[a], #4]\n\t" + "# i += 1\n\t" + "mov r6, #4\n\t" + "add r9, r6\n\t" + "add r11, r6\n\t" + "mov r12, %[ca]\n\t" + "mov %[a], r9\n\t" + "mov r4, #2\n\t" + "lsl r4, r4, #8\n\t" + "cmp r11, r4\n\t" + "blt 1b\n\t" + "mov %[m], r14\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_128(r, a, b); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_128(r, a); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r5, %[div], #1\n\t" + "add r5, #1\n\t" + "mov r8, %[d0]\n\t" + "mov r9, %[d1]\n\t" + "# Do top 32\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "# Next 30 bits\n\t" + "mov r4, #29\n\t" + "1:\n\t" + "lsl %[d0], %[d0], #1\n\t" + "adc %[d1], %[d1]\n\t" + "mov r6, r5\n\t" + "sub r6, %[d1]\n\t" + "sbc r6, r6\n\t" + "add %[r], %[r]\n\t" + "sub %[r], r6\n\t" + "and r6, r5\n\t" + "sub %[d1], r6\n\t" + "sub r4, #1\n\t" + "bpl 1b\n\t" + "mov r7, #0\n\t" + "add %[r], %[r]\n\t" + "add %[r], #1\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "sub %[d1], r4\n\t" + "mov r4, %[d1]\n\t" + "mov %[d1], r9\n\t" + "sbc %[d1], r5\n\t" + "mov r5, %[d1]\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "# r * div - Start\n\t" + "lsl %[d1], %[r], #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr %[d1], %[d1], #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, %[d1]\n\t" + "lsr r6, %[div], #16\n\t" + "mul %[d1], r6\n\t" + "lsr r5, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r7\n\t" + "lsr %[d1], %[r], #16\n\t" + "mul r6, %[d1]\n\t" + "add r5, r6\n\t" + "lsl r6, %[div], #16\n\t" + "lsr r6, r6, #16\n\t" + "mul %[d1], r6\n\t" + "lsr r6, %[d1], #16\n\t" + "lsl %[d1], %[d1], #16\n\t" + "add r4, %[d1]\n\t" + "adc r5, r6\n\t" + "# r * div - Done\n\t" + "mov %[d1], r8\n\t" + "mov r6, r9\n\t" + "sub r4, %[d1], r4\n\t" + "sbc r6, r5\n\t" + "mov r5, r6\n\t" + "add %[r], r5\n\t" + "mov r6, %[div]\n\t" + "sub r6, r4\n\t" + "sbc r6, r6\n\t" + "sub %[r], r6\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r7", "r6", "r8", "r9" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<128; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 128; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #252\n\t" + "1:\n\t" + "ldr r7, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r7, r3\n\t" + "and r5, r3\n\t" + "mov r4, r7\n\t" + "sub r7, r5\n\t" + "sbc r7, r7\n\t" + "add %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r5, r4\n\t" + "sbc r7, r7\n\t" + "sub %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r7\n\t" + "sub r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r7" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128(a, m, NULL, r); +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + if (t1[128 + i] != 0) { + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + if (t1[128 + i] != 0) + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + } + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][256]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][256]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_128(t[16], t[ 8], m, mp); + sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_128(t[18], t[ 9], m, mp); + sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_128(t[20], t[10], m, mp); + sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_128(t[22], t[11], m, mp); + sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_128(t[24], t[12], m, mp); + sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_128(t[26], t[13], m, mp); + sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_128(t[28], t[14], m, mp); + sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_128(t[30], t[15], m, mp); + sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[256], md[128], rd[256]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit *ah; + sp_digit* m; + sp_digit* r; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 128 * 2; + m = r + 128 * 2; + ah = a + 128; + } +#else + a = ad; + m = md; + r = rd; + ah = a + 128; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(ah, 128, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; +#endif + if (e[0] == 0) + err = MP_EXPTMOD_E; + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 128, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_4096_sqr_128(r, ah); + err = sp_4096_mod_128_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_128(r, ah, r); + err = sp_4096_mod_128_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 128); + err = sp_4096_mod_128_cond(a, a, m); + + if (err == MP_OKAY) { + for (i=31; i>=0; i--) + if (e[0] >> i) + break; + + XMEMCPY(r, a, sizeof(sp_digit) * 128); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_128(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) + sp_4096_mont_mul_128(r, r, a, m, mp); + } + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128); + sp_4096_mont_reduce_128(r, m, mp); + + for (i = 127; i > 0; i--) { + if (r[i] != m[i]) + break; + } + if (r[i] >= m[i]) + sp_4096_sub_in_place_128(r, m); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_RSA); +#endif + + return err; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[128 * 2]; + sp_digit pd[64], qd[64], dpd[64]; + sp_digit tmpad[128], tmpbd[128]; +#else + sp_digit* t = NULL; +#endif + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmp; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 128 * 2; + q = p + 64; + qi = dq = dp = q + 64; + tmpa = qi + 64; + tmpb = tmpa + 128; + + tmp = t; + r = tmp + 128; + } +#else + r = a = ad; + p = pd; + q = qd; + qi = dq = dp = dpd; + tmpa = tmpad; + tmpb = tmpbd; + tmp = a + 128; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 128, in, inLen); + sp_4096_from_mp(p, 64, pm); + sp_4096_from_mp(q, 64, qm); + sp_4096_from_mp(dp, 64, dpm); + + err = sp_4096_mod_exp_64(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 64, dqm); + err = sp_4096_mod_exp_64(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_4096_sub_in_place_64(tmpa, tmpb); + sp_4096_mask_64(tmp, p, c); + sp_4096_add_64(tmpa, tmpa, tmp); + + sp_4096_from_mp(qi, 64, qim); + sp_4096_mul_64(tmpa, tmpa, qi); + err = sp_4096_mod_64(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_64(tmpa, q, tmpa); + XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64); + sp_4096_add_128(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpad, 0, sizeof(tmpad)); + XMEMSET(tmpbd, 0, sizeof(tmpbd)); + XMEMSET(pd, 0, sizeof(pd)); + XMEMSET(qd, 0, sizeof(qd)); + XMEMSET(dpd, 0, sizeof(dpd)); +#endif + + return err; +} +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 128); + r->used = 128; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= a[i] << s; + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = a[i] >> s; + } + } + s = 32 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_mp(e, 128, exp); + sp_4096_from_mp(m, 128, mod); + + err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "add %[a], %[a], #255\n\t" + "add %[r], %[r], #255\n\t" + "add %[a], %[a], #193\n\t" + "add %[r], %[r], #193\n\t" + "ldr r3, [%[a], #60]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[256]; + sp_digit td[129]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 256; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_4096_lshift_128(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_lshift_128(r, r, y); + sp_4096_mul_d_128(tmp, norm, r[128]); + r[128] = 0; + o = sp_4096_add_128(r, r, tmp); + sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_bin(e, 128, exp, expLen); + sp_4096_from_mp(m, 128, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1) + err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m); + else + #endif + err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_ECC #ifndef WOLFSSL_SP_NO_256 diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 743b33b93..2a23a0686 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -1800,6 +1800,8 @@ static int sp_2048_div_45(const sp_digit* a, const sp_digit* d, sp_digit* m, } #endif + (void)m; + if (err == MP_OKAY) { #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) t1 = td; @@ -1811,8 +1813,8 @@ static int sp_2048_div_45(const sp_digit* a, const sp_digit* d, sp_digit* m, sd = sdd; #endif - sp_2048_mul_d_45(sd, d, 1 << 11); - sp_2048_mul_d_90(t1, a, 1 << 11); + sp_2048_mul_d_45(sd, d, 1L << 11); + sp_2048_mul_d_90(t1, a, 1L << 11); dv = sd[44]; for (i=45; i>=0; i--) { t1[45 + i] += t1[45 + i - 1] >> 23; @@ -2727,6 +2729,8 @@ static int sp_2048_div_90(const sp_digit* a, const sp_digit* d, sp_digit* m, } #endif + (void)m; + if (err == MP_OKAY) { #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) t1 = td; @@ -2738,8 +2742,8 @@ static int sp_2048_div_90(const sp_digit* a, const sp_digit* d, sp_digit* m, sd = sdd; #endif - sp_2048_mul_d_90(sd, d, 1 << 22); - sp_2048_mul_d_180(t1, a, 1 << 22); + sp_2048_mul_d_90(sd, d, 1L << 22); + sp_2048_mul_d_180(t1, a, 1L << 22); dv = sd[89]; for (i=90; i>=0; i--) { t1[90 + i] += t1[90 + i - 1] >> 23; @@ -6587,6 +6591,8 @@ static int sp_3072_div_134(const sp_digit* a, const sp_digit* d, sp_digit* m, } #endif + (void)m; + if (err == MP_OKAY) { #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) t1 = td; @@ -6598,8 +6604,8 @@ static int sp_3072_div_134(const sp_digit* a, const sp_digit* d, sp_digit* m, sd = sdd; #endif - sp_3072_mul_d_134(sd, d, 1 << 10); - sp_3072_mul_d_268(t1, a, 1 << 10); + sp_3072_mul_d_134(sd, d, 1L << 10); + sp_3072_mul_d_268(t1, a, 1L << 10); dv = sd[133]; for (i=134; i>=0; i--) { t1[134 + i] += t1[134 + i - 1] >> 23; @@ -8424,6 +8430,4011 @@ int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #endif /* !WOLFSSL_SP_NO_3072 */ +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 13U) { + r[j] &= 0x1fffff; + s = 21U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 21 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 21 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x1fffff; + s = 21U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 21U) <= (word32)DIGIT_BIT) { + s += 21U; + r[j] &= 0x1fffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 21) { + r[j] &= 0x1fffff; + if (j + 1 >= size) { + break; + } + s = 21 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<195; i++) { + r[i+1] += r[i] >> 21; + r[i] &= 0x1fffff; + } + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<196 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/ + if (j < 0) { + break; + } + while (b < 21) { + a[j--] = r[i] >> b; b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 21); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_49(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j; + int64_t t[98]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<49; i++) { + for (j=0; j<49; j++) { + t[i+j] += ((int64_t)a[i]) * b[j]; + } + } + for (i=0; i<97; i++) { + r[i] = t[i] & 0x1fffff; + t[i+1] += t[i] >> 21; + } + r[97] = (sp_digit)t[97]; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_49(sp_digit* r, const sp_digit* a) +{ + int i, j; + int64_t t[98]; + + XMEMSET(t, 0, sizeof(t)); + for (i=0; i<49; i++) { + for (j=0; j> 21; + } + r[97] = (sp_digit)t[97]; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_49(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 48; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[48] = a[48] + b[48]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[96] = a[96] + b[96]; + r[97] = a[97] + b[97]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[96] = a[96] - b[96]; + r[97] = a[97] - b[97]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[98]; + sp_digit* a1 = z1; + sp_digit b1[49]; + sp_digit* z2 = r + 98; + (void)sp_4096_add_49(a1, a, &a[49]); + (void)sp_4096_add_49(b1, b, &b[49]); + sp_4096_mul_49(z2, &a[49], &b[49]); + sp_4096_mul_49(z0, a, b); + sp_4096_mul_49(z1, a1, b1); + (void)sp_4096_sub_98(z1, z1, z2); + (void)sp_4096_sub_98(z1, z1, z0); + (void)sp_4096_add_98(r + 49, r + 49, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_98(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[98]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 98; + (void)sp_4096_add_49(a1, a, &a[49]); + sp_4096_sqr_49(z2, &a[49]); + sp_4096_sqr_49(z0, a); + sp_4096_sqr_49(z1, a1); + (void)sp_4096_sub_98(z1, z1, z2); + (void)sp_4096_sub_98(z1, z1, z0); + (void)sp_4096_add_98(r + 49, r + 49, z1); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[192] = a[192] + b[192]; + r[193] = a[193] + b[193]; + r[194] = a[194] + b[194]; + r[195] = a[195] + b[195]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[192] = a[192] - b[192]; + r[193] = a[193] - b[193]; + r[194] = a[194] - b[194]; + r[195] = a[195] - b[195]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[196]; + sp_digit* a1 = z1; + sp_digit b1[98]; + sp_digit* z2 = r + 196; + (void)sp_4096_add_98(a1, a, &a[98]); + (void)sp_4096_add_98(b1, b, &b[98]); + sp_4096_mul_98(z2, &a[98], &b[98]); + sp_4096_mul_98(z0, a, b); + sp_4096_mul_98(z1, a1, b1); + (void)sp_4096_sub_196(z1, z1, z2); + (void)sp_4096_sub_196(z1, z1, z0); + (void)sp_4096_add_196(r + 98, r + 98, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_196(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[196]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 196; + (void)sp_4096_add_98(a1, a, &a[98]); + sp_4096_sqr_98(z2, &a[98]); + sp_4096_sqr_98(z0, a); + sp_4096_sqr_98(z1, a1); + (void)sp_4096_sub_196(z1, z1, z2); + (void)sp_4096_sub_196(z1, z1, z0); + (void)sp_4096_add_196(r + 98, r + 98, z1); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[195]) * b[195]; + r[391] = (sp_digit)(c >> 21); + c = (c & 0x1fffff) << 21; + for (k = 389; k >= 0; k--) { + for (i = 195; i >= 0; i--) { + j = k - i; + if (j >= 196) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 42; + r[k + 1] = (c >> 21) & 0x1fffff; + c = (c & 0x1fffff) << 21; + } + r[0] = (sp_digit)(c >> 21); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_196(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[195]) * a[195]; + r[391] = (sp_digit)(c >> 21); + c = (c & 0x1fffff) << 21; + for (k = 389; k >= 0; k--) { + for (i = 195; i >= 0; i--) { + j = k - i; + if (j >= 196 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 42; + r[k + 1] = (c >> 21) & 0x1fffff; + c = (c & 0x1fffff) << 21; + } + r[0] = (sp_digit)(c >> 21); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[97]) * b[97]; + r[195] = (sp_digit)(c >> 21); + c = (c & 0x1fffff) << 21; + for (k = 193; k >= 0; k--) { + for (i = 97; i >= 0; i--) { + j = k - i; + if (j >= 98) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * b[j]; + } + r[k + 2] += c >> 42; + r[k + 1] = (c >> 21) & 0x1fffff; + c = (c & 0x1fffff) << 21; + } + r[0] = (sp_digit)(c >> 21); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_98(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int64_t c; + + c = ((int64_t)a[97]) * a[97]; + r[195] = (sp_digit)(c >> 21); + c = (c & 0x1fffff) << 21; + for (k = 193; k >= 0; k--) { + for (i = 97; i >= 0; i--) { + j = k - i; + if (j >= 98 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int64_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int64_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 42; + r[k + 1] = (c >> 21) & 0x1fffff; + c = (c & 0x1fffff) << 21; + } + r[0] = (sp_digit)(c >> 21); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x &= 0x1fffff; + + /* rho = -1/m mod b */ + *rho = (1L << 21) - x; +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_196(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 196; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[196] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff; + for (i = 0; i < 192; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff); + } + t[1] = tb * a[193]; + r[193] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[194]; + r[194] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[195]; + r[195] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff); + r[196] = (sp_digit)(t[3] >> 21); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A signle precision number. + */ +static void sp_4096_mont_norm_98(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<97; i++) { + r[i] = 0x1fffff; + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = 0x1fffff; + r[i + 1] = 0x1fffff; + r[i + 2] = 0x1fffff; + r[i + 3] = 0x1fffff; + r[i + 4] = 0x1fffff; + r[i + 5] = 0x1fffff; + r[i + 6] = 0x1fffff; + r[i + 7] = 0x1fffff; + } + r[96] = 0x1fffff; +#endif + r[97] = 0x7ffL; + + /* r = (2^n - 1) mod n */ + (void)sp_4096_sub_98(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_4096_cmp_98(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=97; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[97] - b[97]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[96] - b[96]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 88; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_4096_cond_sub_98(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[96] = a[96] - (b[96] & m); + r[97] = a[97] - (b[97] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_add_98(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 98; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[98] += t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += t[0] & 0x1fffff; + for (i = 0; i < 96; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[i+2]; + r[i+2] += (t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[i+3]; + r[i+3] += (t[2] >> 21) + (t[3] & 0x1fffff); + t[4] = tb * a[i+4]; + r[i+4] += (t[3] >> 21) + (t[4] & 0x1fffff); + t[5] = tb * a[i+5]; + r[i+5] += (t[4] >> 21) + (t[5] & 0x1fffff); + t[6] = tb * a[i+6]; + r[i+6] += (t[5] >> 21) + (t[6] & 0x1fffff); + t[7] = tb * a[i+7]; + r[i+7] += (t[6] >> 21) + (t[7] & 0x1fffff); + t[0] = tb * a[i+8]; + r[i+8] += (t[7] >> 21) + (t[0] & 0x1fffff); + } + t[1] = tb * a[97]; r[97] += (t[0] >> 21) + (t[1] & 0x1fffff); + r[98] += t[1] >> 21; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 21. + * + * a Array of sp_digit to normalize. + */ +static void sp_4096_norm_98(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 97; i++) { + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + } +#else + int i; + for (i = 0; i < 96; i += 8) { + a[i+1] += a[i+0] >> 21; a[i+0] &= 0x1fffff; + a[i+2] += a[i+1] >> 21; a[i+1] &= 0x1fffff; + a[i+3] += a[i+2] >> 21; a[i+2] &= 0x1fffff; + a[i+4] += a[i+3] >> 21; a[i+3] &= 0x1fffff; + a[i+5] += a[i+4] >> 21; a[i+4] &= 0x1fffff; + a[i+6] += a[i+5] >> 21; a[i+5] &= 0x1fffff; + a[i+7] += a[i+6] >> 21; a[i+6] &= 0x1fffff; + a[i+8] += a[i+7] >> 21; a[i+7] &= 0x1fffff; + a[i+9] += a[i+8] >> 21; a[i+8] &= 0x1fffff; + } + a[96+1] += a[96] >> 21; + a[96] &= 0x1fffff; +#endif +} + +/* Shift the result in the high 2048 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_4096_mont_shift_98(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int64_t n = a[97] >> 11; + n += ((int64_t)a[98]) << 10; + + for (i = 0; i < 97; i++) { + r[i] = n & 0x1fffff; + n >>= 21; + n += ((int64_t)a[99 + i]) << 10; + } + r[97] = (sp_digit)n; +#else + int i; + int64_t n = a[97] >> 11; + n += ((int64_t)a[98]) << 10; + for (i = 0; i < 96; i += 8) { + r[i + 0] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 99]) << 10; + r[i + 1] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 100]) << 10; + r[i + 2] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 101]) << 10; + r[i + 3] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 102]) << 10; + r[i + 4] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 103]) << 10; + r[i + 5] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 104]) << 10; + r[i + 6] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 105]) << 10; + r[i + 7] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 106]) << 10; + } + r[96] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[195]) << 10; + r[97] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[98], 0, sizeof(*r) * 98U); +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_4096_mont_reduce_98(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + for (i=0; i<97; i++) { + mu = (a[i] * mp) & 0x1fffff; + sp_4096_mul_add_98(a+i, m, mu); + a[i+1] += a[i] >> 21; + } + mu = (a[i] * mp) & 0x7ffL; + sp_4096_mul_add_98(a+i, m, mu); + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + + sp_4096_mont_shift_98(a, a); + sp_4096_cond_sub_98(a, a, m, 0 - (((a[97] >> 11) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_4096_norm_98(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_98(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_98(r, a, b); + sp_4096_mont_reduce_98(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_98(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_98(r, a); + sp_4096_mont_reduce_98(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_98(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 98; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[98] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff; + for (i = 0; i < 96; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff); + } + t[1] = tb * a[97]; + r[97] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + r[98] = (sp_digit)(t[1] >> 21); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_4096_cond_add_98(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[96] = a[96] + (b[96] & m); + r[97] = a[97] + (b[97] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_98(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 98; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_4096_rshift_98(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<97; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (21 - n))) & 0x1fffff; + } +#else + for (i=0; i<96; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (21 - n))) & 0x1fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (21 - n))) & 0x1fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (21 - n))) & 0x1fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (21 - n))) & 0x1fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (21 - n))) & 0x1fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (21 - n))) & 0x1fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (21 - n))) & 0x1fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (21 - n))) & 0x1fffff; + } + r[96] = ((a[96] >> n) | (a[97] << (21 - n))) & 0x1fffff; +#endif + r[97] = a[97] >> n; +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_4096_div_word_98(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 21 bits from d1 and top 10 bits from d0. */ + d = (d1 << 10) | (d0 >> 11); + r = d / dv; + d -= r * dv; + /* Up to 11 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 1) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 21 bits in r */ + /* Remaining 1 bits from d0. */ + r <<= 1; + d <<= 1; + d |= d0 & ((1 << 1) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_div_98(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* td; +#else + sp_digit t1d[196 + 1], t2d[98 + 1], sdd[98 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 98 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + t1 = td; + t2 = td + 196 + 1; + sd = t2 + 98 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_4096_mul_d_98(sd, d, 1L << 10); + sp_4096_mul_d_196(t1, a, 1L << 10); + dv = sd[97]; + for (i=98; i>=0; i--) { + t1[98 + i] += t1[98 + i - 1] >> 21; + t1[98 + i - 1] &= 0x1fffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[98 + i]; + d1 <<= 21; + d1 += t1[98 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_4096_div_word_98(t1[98 + i], t1[98 + i - 1], dv); +#endif + + sp_4096_mul_d_98(t2, sd, r1); + (void)sp_4096_sub_98(&t1[i], &t1[i], t2); + t1[98 + i] -= t2[98]; + t1[98 + i] += t1[98 + i - 1] >> 21; + t1[98 + i - 1] &= 0x1fffff; + r1 = (((-t1[98 + i]) << 21) - t1[98 + i - 1]) / dv; + r1 -= t1[98 + i]; + sp_4096_mul_d_98(t2, sd, r1); + (void)sp_4096_add_98(&t1[i], &t1[i], t2); + t1[98 + i] += t1[98 + i - 1] >> 21; + t1[98 + i - 1] &= 0x1fffff; + } + t1[98 - 1] += t1[98 - 2] >> 21; + t1[98 - 2] &= 0x1fffff; + d1 = t1[98 - 1]; + r1 = (sp_digit)(d1 / dv); + + sp_4096_mul_d_98(t2, sd, r1); + sp_4096_sub_98(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 98U); + for (i=0; i<96; i++) { + r[i+1] += r[i] >> 21; + r[i] &= 0x1fffff; + } + sp_4096_cond_add_98(r, r, sd, 0 - ((r[97] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_4096_norm_98(r); + sp_4096_rshift_98(r, r, 10); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_mod_98(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_98(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_98(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 98 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 98U * 2U); + + norm = t[0] = td; + t[1] = &td[98 * 2]; + t[2] = &td[2 * 98 * 2]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_98(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_98(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 98U); + } + } + if (err == MP_OKAY) { + sp_4096_mul_98(t[1], t[1], norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 21; + c = bits % 21; + n = e[i--] << (21 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 21; + } + + y = (n >> 20) & 1; + n <<= 1; + + sp_4096_mont_mul_98(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 98 * 2); + sp_4096_mont_sqr_98(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 98 * 2); + } + + sp_4096_mont_reduce_98(t[0], m, mp); + n = sp_4096_cmp_98(t[0], m); + sp_4096_cond_sub_98(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 98 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][196]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 98 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[98 * 2]; + t[2] = &td[2 * 98 * 2]; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_98(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_98(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_98(t[1], t[1], norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + } + else { + sp_4096_mul_98(t[1], a, norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 21; + c = bits % 21; + n = e[i--] << (21 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 21; + } + + y = (n >> 20) & 1; + n <<= 1; + + sp_4096_mont_mul_98(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_4096_mont_sqr_98(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_4096_mont_reduce_98(t[0], m, mp); + n = sp_4096_cmp_98(t[0], m); + sp_4096_cond_sub_98(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][196]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[196]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 196, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 196; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_98(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_98(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_98(t[1], t[1], norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + } + else { + sp_4096_mul_98(t[1], a, norm); + err = sp_4096_mod_98(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_98(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_98(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_98(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_98(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_98(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_98(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_98(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_98(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_98(t[10], t[ 5], m, mp); + sp_4096_mont_mul_98(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_98(t[12], t[ 6], m, mp); + sp_4096_mont_mul_98(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_98(t[14], t[ 7], m, mp); + sp_4096_mont_mul_98(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_98(t[16], t[ 8], m, mp); + sp_4096_mont_mul_98(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_98(t[18], t[ 9], m, mp); + sp_4096_mont_mul_98(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_98(t[20], t[10], m, mp); + sp_4096_mont_mul_98(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_98(t[22], t[11], m, mp); + sp_4096_mont_mul_98(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_98(t[24], t[12], m, mp); + sp_4096_mont_mul_98(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_98(t[26], t[13], m, mp); + sp_4096_mont_mul_98(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_98(t[28], t[14], m, mp); + sp_4096_mont_mul_98(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_98(t[30], t[15], m, mp); + sp_4096_mont_mul_98(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 20) / 21) - 1; + c = bits % 21; + if (c == 0) { + c = 21; + } + if (i < 98) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_98(rt, rt, m, mp); + sp_4096_mont_sqr_98(rt, rt, m, mp); + sp_4096_mont_sqr_98(rt, rt, m, mp); + sp_4096_mont_sqr_98(rt, rt, m, mp); + sp_4096_mont_sqr_98(rt, rt, m, mp); + + sp_4096_mont_mul_98(rt, rt, t[y], m, mp); + } + + sp_4096_mont_reduce_98(rt, m, mp); + n = sp_4096_cmp_98(rt, m); + sp_4096_cond_sub_98(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A signle precision number. + */ +static void sp_4096_mont_norm_196(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<195; i++) { + r[i] = 0x1fffff; + } +#else + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = 0x1fffff; + r[i + 1] = 0x1fffff; + r[i + 2] = 0x1fffff; + r[i + 3] = 0x1fffff; + r[i + 4] = 0x1fffff; + r[i + 5] = 0x1fffff; + r[i + 6] = 0x1fffff; + r[i + 7] = 0x1fffff; + } + r[192] = 0x1fffff; + r[193] = 0x1fffff; + r[194] = 0x1fffff; +#endif + r[195] = 0x1L; + + /* r = (2^n - 1) mod n */ + (void)sp_4096_sub_196(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_4096_cmp_196(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=195; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[195] - b[195]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[194] - b[194]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[193] - b[193]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[192] - b[192]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 184; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_4096_cond_sub_196(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[192] = a[192] - (b[192] & m); + r[193] = a[193] - (b[193] & m); + r[194] = a[194] - (b[194] & m); + r[195] = a[195] - (b[195] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_add_196(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 196; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[196] += t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += t[0] & 0x1fffff; + for (i = 0; i < 192; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[i+2]; + r[i+2] += (t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[i+3]; + r[i+3] += (t[2] >> 21) + (t[3] & 0x1fffff); + t[4] = tb * a[i+4]; + r[i+4] += (t[3] >> 21) + (t[4] & 0x1fffff); + t[5] = tb * a[i+5]; + r[i+5] += (t[4] >> 21) + (t[5] & 0x1fffff); + t[6] = tb * a[i+6]; + r[i+6] += (t[5] >> 21) + (t[6] & 0x1fffff); + t[7] = tb * a[i+7]; + r[i+7] += (t[6] >> 21) + (t[7] & 0x1fffff); + t[0] = tb * a[i+8]; + r[i+8] += (t[7] >> 21) + (t[0] & 0x1fffff); + } + t[1] = tb * a[193]; r[193] += (t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[194]; r[194] += (t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[195]; r[195] += (t[2] >> 21) + (t[3] & 0x1fffff); + r[196] += t[3] >> 21; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 21. + * + * a Array of sp_digit to normalize. + */ +static void sp_4096_norm_196(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 195; i++) { + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + } +#else + int i; + for (i = 0; i < 192; i += 8) { + a[i+1] += a[i+0] >> 21; a[i+0] &= 0x1fffff; + a[i+2] += a[i+1] >> 21; a[i+1] &= 0x1fffff; + a[i+3] += a[i+2] >> 21; a[i+2] &= 0x1fffff; + a[i+4] += a[i+3] >> 21; a[i+3] &= 0x1fffff; + a[i+5] += a[i+4] >> 21; a[i+4] &= 0x1fffff; + a[i+6] += a[i+5] >> 21; a[i+5] &= 0x1fffff; + a[i+7] += a[i+6] >> 21; a[i+6] &= 0x1fffff; + a[i+8] += a[i+7] >> 21; a[i+7] &= 0x1fffff; + a[i+9] += a[i+8] >> 21; a[i+8] &= 0x1fffff; + } + a[192+1] += a[192] >> 21; + a[192] &= 0x1fffff; + a[193+1] += a[193] >> 21; + a[193] &= 0x1fffff; + a[194+1] += a[194] >> 21; + a[194] &= 0x1fffff; +#endif +} + +/* Shift the result in the high 4096 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_4096_mont_shift_196(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int64_t n = a[195] >> 1; + n += ((int64_t)a[196]) << 20; + + for (i = 0; i < 195; i++) { + r[i] = n & 0x1fffff; + n >>= 21; + n += ((int64_t)a[197 + i]) << 20; + } + r[195] = (sp_digit)n; +#else + int i; + int64_t n = a[195] >> 1; + n += ((int64_t)a[196]) << 20; + for (i = 0; i < 192; i += 8) { + r[i + 0] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 197]) << 20; + r[i + 1] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 198]) << 20; + r[i + 2] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 199]) << 20; + r[i + 3] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 200]) << 20; + r[i + 4] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 201]) << 20; + r[i + 5] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 202]) << 20; + r[i + 6] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 203]) << 20; + r[i + 7] = n & 0x1fffff; + n >>= 21; n += ((int64_t)a[i + 204]) << 20; + } + r[192] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[389]) << 20; + r[193] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[390]) << 20; + r[194] = n & 0x1fffff; n >>= 21; n += ((int64_t)a[391]) << 20; + r[195] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[196], 0, sizeof(*r) * 196U); +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_4096_mont_reduce_196(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + +#ifdef WOLFSSL_SP_DH + if (mp != 1) { + for (i=0; i<195; i++) { + mu = (a[i] * mp) & 0x1fffff; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + } + mu = (a[i] * mp) & 0x1L; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + } + else { + for (i=0; i<195; i++) { + mu = a[i] & 0x1fffff; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + } + mu = a[i] & 0x1L; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; + } +#else + for (i=0; i<195; i++) { + mu = (a[i] * mp) & 0x1fffff; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + } + mu = (a[i] * mp) & 0x1L; + sp_4096_mul_add_196(a+i, m, mu); + a[i+1] += a[i] >> 21; + a[i] &= 0x1fffff; +#endif + + sp_4096_mont_shift_196(a, a); + sp_4096_cond_sub_196(a, a, m, 0 - (((a[195] >> 1) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_4096_norm_196(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_196(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_196(r, a, b); + sp_4096_mont_reduce_196(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_196(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_196(r, a); + sp_4096_mont_reduce_196(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_392(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 392; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffff; + t >>= 21; + } + r[392] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffff; + for (i = 0; i < 392; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 21) + (t[1] & 0x1fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 21) + (t[2] & 0x1fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 21) + (t[3] & 0x1fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 21) + (t[4] & 0x1fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 21) + (t[5] & 0x1fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 21) + (t[6] & 0x1fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 21) + (t[7] & 0x1fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 21) + (t[0] & 0x1fffff); + } + r[392] = (sp_digit)(t[7] >> 21); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_4096_cond_add_196(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 192; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[192] = a[192] + (b[192] & m); + r[193] = a[193] + (b[193] & m); + r[194] = a[194] + (b[194] & m); + r[195] = a[195] + (b[195] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_196(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 196; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_4096_rshift_196(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<195; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (21 - n))) & 0x1fffff; + } +#else + for (i=0; i<192; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (21 - n))) & 0x1fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (21 - n))) & 0x1fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (21 - n))) & 0x1fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (21 - n))) & 0x1fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (21 - n))) & 0x1fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (21 - n))) & 0x1fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (21 - n))) & 0x1fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (21 - n))) & 0x1fffff; + } + r[192] = ((a[192] >> n) | (a[193] << (21 - n))) & 0x1fffff; + r[193] = ((a[193] >> n) | (a[194] << (21 - n))) & 0x1fffff; + r[194] = ((a[194] >> n) | (a[195] << (21 - n))) & 0x1fffff; +#endif + r[195] = a[195] >> n; +} + +#ifdef WOLFSSL_SP_DIV_32 +static WC_INLINE sp_digit sp_4096_div_word_196(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t; + + /* All 21 bits from d1 and top 10 bits from d0. */ + d = (d1 << 10) | (d0 >> 11); + r = d / dv; + d -= r * dv; + /* Up to 11 bits in r */ + /* Next 10 bits from d0. */ + r <<= 10; + d <<= 10; + d |= (d0 >> 1) & ((1 << 10) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 21 bits in r */ + /* Remaining 1 bits from d0. */ + r <<= 1; + d <<= 1; + d |= d0 & ((1 << 1) - 1); + t = d / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_32 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_div_196(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_32 + int64_t d1; +#endif + sp_digit dv, r1; +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* td; +#else + sp_digit t1d[392 + 1], t2d[196 + 1], sdd[196 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 196 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + t1 = td; + t2 = td + 392 + 1; + sd = t2 + 196 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_4096_mul_d_196(sd, d, 1L << 20); + sp_4096_mul_d_392(t1, a, 1L << 20); + dv = sd[195]; + for (i=196; i>=0; i--) { + t1[196 + i] += t1[196 + i - 1] >> 21; + t1[196 + i - 1] &= 0x1fffff; +#ifndef WOLFSSL_SP_DIV_32 + d1 = t1[196 + i]; + d1 <<= 21; + d1 += t1[196 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_4096_div_word_196(t1[196 + i], t1[196 + i - 1], dv); +#endif + + sp_4096_mul_d_196(t2, sd, r1); + (void)sp_4096_sub_196(&t1[i], &t1[i], t2); + t1[196 + i] -= t2[196]; + t1[196 + i] += t1[196 + i - 1] >> 21; + t1[196 + i - 1] &= 0x1fffff; + r1 = (((-t1[196 + i]) << 21) - t1[196 + i - 1]) / dv; + r1 -= t1[196 + i]; + sp_4096_mul_d_196(t2, sd, r1); + (void)sp_4096_add_196(&t1[i], &t1[i], t2); + t1[196 + i] += t1[196 + i - 1] >> 21; + t1[196 + i - 1] &= 0x1fffff; + } + t1[196 - 1] += t1[196 - 2] >> 21; + t1[196 - 2] &= 0x1fffff; + d1 = t1[196 - 1]; + r1 = (sp_digit)(d1 / dv); + + sp_4096_mul_d_196(t2, sd, r1); + sp_4096_sub_196(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 196U); + for (i=0; i<194; i++) { + r[i+1] += r[i] >> 21; + r[i] &= 0x1fffff; + } + sp_4096_cond_add_196(r, r, sd, 0 - ((r[195] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_4096_norm_196(r); + sp_4096_rshift_196(r, r, 20); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_mod_196(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_196(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_196(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 196 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 196U * 2U); + + norm = t[0] = td; + t[1] = &td[196 * 2]; + t[2] = &td[2 * 196 * 2]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_196(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 196U); + } + } + if (err == MP_OKAY) { + sp_4096_mul_196(t[1], t[1], norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 21; + c = bits % 21; + n = e[i--] << (21 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 21; + } + + y = (n >> 20) & 1; + n <<= 1; + + sp_4096_mont_mul_196(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 196 * 2); + sp_4096_mont_sqr_196(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 196 * 2); + } + + sp_4096_mont_reduce_196(t[0], m, mp); + n = sp_4096_cmp_196(t[0], m); + sp_4096_cond_sub_196(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 196 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][392]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 196 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[196 * 2]; + t[2] = &td[2 * 196 * 2]; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_196(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_196(t[1], t[1], norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + } + else { + sp_4096_mul_196(t[1], a, norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 21; + c = bits % 21; + n = e[i--] << (21 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 21; + } + + y = (n >> 20) & 1; + n <<= 1; + + sp_4096_mont_mul_196(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_4096_mont_sqr_196(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_4096_mont_reduce_196(t[0], m, mp); + n = sp_4096_cmp_196(t[0], m); + sp_4096_cond_sub_196(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][392]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[392]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 392, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 392; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_196(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_196(t[1], t[1], norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + } + else { + sp_4096_mul_196(t[1], a, norm); + err = sp_4096_mod_196(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_196(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_196(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_196(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_196(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_196(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_196(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_196(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_196(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_196(t[10], t[ 5], m, mp); + sp_4096_mont_mul_196(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_196(t[12], t[ 6], m, mp); + sp_4096_mont_mul_196(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_196(t[14], t[ 7], m, mp); + sp_4096_mont_mul_196(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_196(t[16], t[ 8], m, mp); + sp_4096_mont_mul_196(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_196(t[18], t[ 9], m, mp); + sp_4096_mont_mul_196(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_196(t[20], t[10], m, mp); + sp_4096_mont_mul_196(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_196(t[22], t[11], m, mp); + sp_4096_mont_mul_196(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_196(t[24], t[12], m, mp); + sp_4096_mont_mul_196(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_196(t[26], t[13], m, mp); + sp_4096_mont_mul_196(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_196(t[28], t[14], m, mp); + sp_4096_mont_mul_196(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_196(t[30], t[15], m, mp); + sp_4096_mont_mul_196(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 20) / 21) - 1; + c = bits % 21; + if (c == 0) { + c = 21; + } + if (i < 196) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_196(rt, rt, m, mp); + sp_4096_mont_sqr_196(rt, rt, m, mp); + sp_4096_mont_sqr_196(rt, rt, m, mp); + sp_4096_mont_sqr_196(rt, rt, m, mp); + sp_4096_mont_sqr_196(rt, rt, m, mp); + + sp_4096_mont_mul_196(rt, rt, t[y], m, mp); + } + + sp_4096_mont_reduce_196(rt, m, mp); + n = sp_4096_cmp_196(rt, m); + sp_4096_cond_sub_196(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ + /* WOLFSSL_HAVE_SP_DH */ + +#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ + !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_98(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<98; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } + r[96] = a[96] & m; + r[97] = a[97] & m; +#endif +} + +#endif +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit* norm; + sp_digit e[1] = {0}; + sp_digit mp; + int i; + int err = MP_OKAY; + + if (*outLen < 512U) { + err = MP_TO_E; + } + + if (err == MP_OKAY) { + if (mp_count_bits(em) > 21) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 196 * 2; + m = r + 196 * 2; + norm = r; + + sp_4096_from_bin(a, 196, in, inLen); +#if DIGIT_BIT >= 21 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(m, 196, mm); + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + } + if (err == MP_OKAY) { + sp_4096_mul_196(a, a, norm); + err = sp_4096_mod_196(a, a, m); + } + if (err == MP_OKAY) { + for (i=20; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 196 * 2); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_196(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_196(r, r, a, m, mp); + } + } + sp_4096_mont_reduce_196(r, m, mp); + mp = sp_4096_cmp_196(r, m); + sp_4096_cond_sub_196(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0)- 1); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[392], md[196], rd[392]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit e[1] = {0}; + int err = MP_OKAY; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(em) > 21) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + a = d; + r = a + 196 * 2; + m = r + 196 * 2; + } +#else + a = ad; + m = md; + r = rd; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 196, in, inLen); +#if DIGIT_BIT >= 21 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 196, mm); + + if (e[0] == 0x3) { + sp_4096_sqr_196(r, a); + err = sp_4096_mod_196(r, r, m); + if (err == MP_OKAY) { + sp_4096_mul_196(r, a, r); + err = sp_4096_mod_196(r, r, m); + } + } + else { + sp_digit* norm = r; + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + sp_4096_mul_196(a, a, norm); + err = sp_4096_mod_196(a, a, m); + + if (err == MP_OKAY) { + for (i=20; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 392U); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_196(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_196(r, r, a, m, mp); + } + } + sp_4096_mont_reduce_196(r, m, mp); + mp = sp_4096_cmp_196(r, m); + sp_4096_cond_sub_196(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 196 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 196; + m = a + 196; + r = a; + + sp_4096_from_bin(a, 196, in, inLen); + sp_4096_from_mp(d, 196, dm); + sp_4096_from_mp(m, 196, mm); + err = sp_4096_mod_exp_196(r, a, d, 4096, m, 0); + } + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 196); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[392], d[196], m[196]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 196, in, inLen); + sp_4096_from_mp(d, 196, dm); + sp_4096_from_mp(m, 196, mm); + err = sp_4096_mod_exp_196(r, a, d, 4096, m, 0); + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + XMEMSET(d, 0, sizeof(sp_digit) * 196); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#else +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmp; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 98 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = t; + p = a + 196 * 2; + q = p + 98; + qi = dq = dp = q + 98; + tmpa = qi + 98; + tmpb = tmpa + 196; + + tmp = t; + r = tmp + 196; + + sp_4096_from_bin(a, 196, in, inLen); + sp_4096_from_mp(p, 98, pm); + sp_4096_from_mp(q, 98, qm); + sp_4096_from_mp(dp, 98, dpm); + err = sp_4096_mod_exp_98(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 98, dqm); + err = sp_4096_mod_exp_98(tmpb, a, dq, 2048, q, 1); + } + if (err == MP_OKAY) { + (void)sp_4096_sub_98(tmpa, tmpa, tmpb); + sp_4096_mask_98(tmp, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); + (void)sp_4096_add_98(tmpa, tmpa, tmp); + + sp_4096_from_mp(qi, 98, qim); + sp_4096_mul_98(tmpa, tmpa, qi); + err = sp_4096_mod_98(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_98(tmpa, q, tmpa); + (void)sp_4096_add_196(r, tmpb, tmpa); + sp_4096_norm_196(r); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 98 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[196 * 2]; + sp_digit p[98], q[98], dp[98], dq[98], qi[98]; + sp_digit tmp[196], tmpa[196], tmpb[196]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 196, in, inLen); + sp_4096_from_mp(p, 98, pm); + sp_4096_from_mp(q, 98, qm); + sp_4096_from_mp(dp, 98, dpm); + sp_4096_from_mp(dq, 98, dqm); + sp_4096_from_mp(qi, 98, qim); + + err = sp_4096_mod_exp_98(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + err = sp_4096_mod_exp_98(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + (void)sp_4096_sub_98(tmpa, tmpa, tmpb); + sp_4096_mask_98(tmp, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); + (void)sp_4096_add_98(tmpa, tmpa, tmp); + sp_4096_mul_98(tmpa, tmpa, qi); + err = sp_4096_mod_98(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_98(tmpa, tmpa, q); + (void)sp_4096_add_196(r, tmpb, tmpa); + sp_4096_norm_196(r); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); + XMEMSET(dq, 0, sizeof(dq)); + XMEMSET(qi, 0, sizeof(qi)); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +} + +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 21 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 196); + r->used = 196; + mp_clamp(r); +#elif DIGIT_BIT < 21 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 196; i++) { + r->dp[j] |= a[i] << s; + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + while (s + DIGIT_BIT <= 21) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = a[i] >> s; + } + } + s = 21 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 196; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 21 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 21 - s; + } + else { + s += 21; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 196 * 2; + m = e + 196; + r = b; + + sp_4096_from_mp(b, 196, base); + sp_4096_from_mp(e, 196, exp); + sp_4096_from_mp(m, 196, mod); + + err = sp_4096_mod_exp_196(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 196U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[392], ed[196], md[196]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 196 * 2; + m = e + 196; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 196, base); + sp_4096_from_mp(e, 196, exp); + sp_4096_from_mp(m, 196, mod); + + err = sp_4096_mod_exp_196(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(sp_digit) * 196U); + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_DH); +#endif + + return err; +#endif +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +SP_NOINLINE static void sp_4096_lshift_196(sp_digit* r, sp_digit* a, byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[196] = a[195] >> (21 - n); + for (i=195; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (21 - n))) & 0x1fffff; + } +#else + sp_int_digit s, t; + + s = (sp_int_digit)a[195]; + r[196] = s >> (21U - n); + s = (sp_int_digit)(a[195]); t = (sp_int_digit)(a[194]); + r[195] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[194]); t = (sp_int_digit)(a[193]); + r[194] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[193]); t = (sp_int_digit)(a[192]); + r[193] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[192]); t = (sp_int_digit)(a[191]); + r[192] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[191]); t = (sp_int_digit)(a[190]); + r[191] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[190]); t = (sp_int_digit)(a[189]); + r[190] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[189]); t = (sp_int_digit)(a[188]); + r[189] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[188]); t = (sp_int_digit)(a[187]); + r[188] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[187]); t = (sp_int_digit)(a[186]); + r[187] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[186]); t = (sp_int_digit)(a[185]); + r[186] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[185]); t = (sp_int_digit)(a[184]); + r[185] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[184]); t = (sp_int_digit)(a[183]); + r[184] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[183]); t = (sp_int_digit)(a[182]); + r[183] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[182]); t = (sp_int_digit)(a[181]); + r[182] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[181]); t = (sp_int_digit)(a[180]); + r[181] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[180]); t = (sp_int_digit)(a[179]); + r[180] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[179]); t = (sp_int_digit)(a[178]); + r[179] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[178]); t = (sp_int_digit)(a[177]); + r[178] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[177]); t = (sp_int_digit)(a[176]); + r[177] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[176]); t = (sp_int_digit)(a[175]); + r[176] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[175]); t = (sp_int_digit)(a[174]); + r[175] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[174]); t = (sp_int_digit)(a[173]); + r[174] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[173]); t = (sp_int_digit)(a[172]); + r[173] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[172]); t = (sp_int_digit)(a[171]); + r[172] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[171]); t = (sp_int_digit)(a[170]); + r[171] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[170]); t = (sp_int_digit)(a[169]); + r[170] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[169]); t = (sp_int_digit)(a[168]); + r[169] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[168]); t = (sp_int_digit)(a[167]); + r[168] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[167]); t = (sp_int_digit)(a[166]); + r[167] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[166]); t = (sp_int_digit)(a[165]); + r[166] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[165]); t = (sp_int_digit)(a[164]); + r[165] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[164]); t = (sp_int_digit)(a[163]); + r[164] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[163]); t = (sp_int_digit)(a[162]); + r[163] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[162]); t = (sp_int_digit)(a[161]); + r[162] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[161]); t = (sp_int_digit)(a[160]); + r[161] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[160]); t = (sp_int_digit)(a[159]); + r[160] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[159]); t = (sp_int_digit)(a[158]); + r[159] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[158]); t = (sp_int_digit)(a[157]); + r[158] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[157]); t = (sp_int_digit)(a[156]); + r[157] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[156]); t = (sp_int_digit)(a[155]); + r[156] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[155]); t = (sp_int_digit)(a[154]); + r[155] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[154]); t = (sp_int_digit)(a[153]); + r[154] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[153]); t = (sp_int_digit)(a[152]); + r[153] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[152]); t = (sp_int_digit)(a[151]); + r[152] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[151]); t = (sp_int_digit)(a[150]); + r[151] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[150]); t = (sp_int_digit)(a[149]); + r[150] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[149]); t = (sp_int_digit)(a[148]); + r[149] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[148]); t = (sp_int_digit)(a[147]); + r[148] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[147]); t = (sp_int_digit)(a[146]); + r[147] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[146]); t = (sp_int_digit)(a[145]); + r[146] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[145]); t = (sp_int_digit)(a[144]); + r[145] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[144]); t = (sp_int_digit)(a[143]); + r[144] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[143]); t = (sp_int_digit)(a[142]); + r[143] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[142]); t = (sp_int_digit)(a[141]); + r[142] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[141]); t = (sp_int_digit)(a[140]); + r[141] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[140]); t = (sp_int_digit)(a[139]); + r[140] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[139]); t = (sp_int_digit)(a[138]); + r[139] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[138]); t = (sp_int_digit)(a[137]); + r[138] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[137]); t = (sp_int_digit)(a[136]); + r[137] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[136]); t = (sp_int_digit)(a[135]); + r[136] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[135]); t = (sp_int_digit)(a[134]); + r[135] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[134]); t = (sp_int_digit)(a[133]); + r[134] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[133]); t = (sp_int_digit)(a[132]); + r[133] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[132]); t = (sp_int_digit)(a[131]); + r[132] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[131]); t = (sp_int_digit)(a[130]); + r[131] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[130]); t = (sp_int_digit)(a[129]); + r[130] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[129]); t = (sp_int_digit)(a[128]); + r[129] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[128]); t = (sp_int_digit)(a[127]); + r[128] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[127]); t = (sp_int_digit)(a[126]); + r[127] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[126]); t = (sp_int_digit)(a[125]); + r[126] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[125]); t = (sp_int_digit)(a[124]); + r[125] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[124]); t = (sp_int_digit)(a[123]); + r[124] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[123]); t = (sp_int_digit)(a[122]); + r[123] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[122]); t = (sp_int_digit)(a[121]); + r[122] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[121]); t = (sp_int_digit)(a[120]); + r[121] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[120]); t = (sp_int_digit)(a[119]); + r[120] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[119]); t = (sp_int_digit)(a[118]); + r[119] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[118]); t = (sp_int_digit)(a[117]); + r[118] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[117]); t = (sp_int_digit)(a[116]); + r[117] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[116]); t = (sp_int_digit)(a[115]); + r[116] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[115]); t = (sp_int_digit)(a[114]); + r[115] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[114]); t = (sp_int_digit)(a[113]); + r[114] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[113]); t = (sp_int_digit)(a[112]); + r[113] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[112]); t = (sp_int_digit)(a[111]); + r[112] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[111]); t = (sp_int_digit)(a[110]); + r[111] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[110]); t = (sp_int_digit)(a[109]); + r[110] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[109]); t = (sp_int_digit)(a[108]); + r[109] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[108]); t = (sp_int_digit)(a[107]); + r[108] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[107]); t = (sp_int_digit)(a[106]); + r[107] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[106]); t = (sp_int_digit)(a[105]); + r[106] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[105]); t = (sp_int_digit)(a[104]); + r[105] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[104]); t = (sp_int_digit)(a[103]); + r[104] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[103]); t = (sp_int_digit)(a[102]); + r[103] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[102]); t = (sp_int_digit)(a[101]); + r[102] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[101]); t = (sp_int_digit)(a[100]); + r[101] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[100]); t = (sp_int_digit)(a[99]); + r[100] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[99]); t = (sp_int_digit)(a[98]); + r[99] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[98]); t = (sp_int_digit)(a[97]); + r[98] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[97]); t = (sp_int_digit)(a[96]); + r[97] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[96]); t = (sp_int_digit)(a[95]); + r[96] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[95]); t = (sp_int_digit)(a[94]); + r[95] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[94]); t = (sp_int_digit)(a[93]); + r[94] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[93]); t = (sp_int_digit)(a[92]); + r[93] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[92]); t = (sp_int_digit)(a[91]); + r[92] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[91]); t = (sp_int_digit)(a[90]); + r[91] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[90]); t = (sp_int_digit)(a[89]); + r[90] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[89]); t = (sp_int_digit)(a[88]); + r[89] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[88]); t = (sp_int_digit)(a[87]); + r[88] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[87]); t = (sp_int_digit)(a[86]); + r[87] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[86]); t = (sp_int_digit)(a[85]); + r[86] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[85]); t = (sp_int_digit)(a[84]); + r[85] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[84]); t = (sp_int_digit)(a[83]); + r[84] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[83]); t = (sp_int_digit)(a[82]); + r[83] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[82]); t = (sp_int_digit)(a[81]); + r[82] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[81]); t = (sp_int_digit)(a[80]); + r[81] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[80]); t = (sp_int_digit)(a[79]); + r[80] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[79]); t = (sp_int_digit)(a[78]); + r[79] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[78]); t = (sp_int_digit)(a[77]); + r[78] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]); + r[77] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]); + r[76] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]); + r[75] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]); + r[74] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]); + r[73] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]); + r[72] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]); + r[71] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]); + r[70] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]); + r[69] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]); + r[68] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]); + r[67] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]); + r[66] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]); + r[65] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]); + r[64] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]); + r[63] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]); + r[62] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]); + r[61] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]); + r[60] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]); + r[59] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]); + r[58] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]); + r[57] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]); + r[56] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]); + r[55] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]); + r[54] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]); + r[53] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]); + r[52] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]); + r[51] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]); + r[50] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]); + r[49] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]); + r[48] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]); + r[47] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]); + r[46] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]); + r[45] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]); + r[44] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]); + r[43] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]); + r[42] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]); + r[41] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]); + r[40] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]); + r[39] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]); + r[38] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]); + r[37] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]); + r[36] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]); + r[35] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]); + r[34] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]); + r[33] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]); + r[32] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]); + r[31] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]); + r[30] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]); + r[29] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]); + r[28] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]); + r[27] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]); + r[26] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]); + r[25] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]); + r[24] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]); + r[23] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]); + r[22] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]); + r[21] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]); + r[20] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]); + r[19] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]); + r[18] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (21U - n))) & ; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (21U - n))) & ; +#endif + r[0] = (a[0] << n) & 0x1fffff; +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_196(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[392]; + sp_digit td[197]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 589, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 392; +#else + norm = nd; + tmp = td; +#endif + + XMEMSET(td, 0, sizeof(td)); + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_196(norm, m); + + bits = ((bits + 3) / 4) * 4; + i = ((bits + 20) / 21) - 1; + c = bits % 21; + if (c == 0) { + c = 21; + } + if (i < 196) { + n = e[i--] << (32 - c); + } + else { + n = 0; + i--; + } + if (c < 4) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + sp_4096_lshift_196(r, norm, y); + for (; i>=0 || c>=4; ) { + if (c < 4) { + n |= e[i--] << (11 - c); + c += 21; + } + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + + sp_4096_mont_sqr_196(r, r, m, mp); + sp_4096_mont_sqr_196(r, r, m, mp); + sp_4096_mont_sqr_196(r, r, m, mp); + sp_4096_mont_sqr_196(r, r, m, mp); + + sp_4096_lshift_196(r, r, y); + sp_4096_mul_d_196(tmp, norm, (r[196] << 20) + (r[195] >> 1)); + r[196] = 0; + r[195] &= 0x1L; + (void)sp_4096_add_196(r, r, tmp); + sp_4096_norm_196(r); + o = sp_4096_cmp_196(r, m); + sp_4096_cond_sub_196(r, r, m, ((o < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + + sp_4096_mont_reduce_196(r, m, mp); + n = sp_4096_cmp_196(r, m); + sp_4096_cond_sub_196(r, r, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 196 * 2; + m = e + 196; + r = b; + + sp_4096_from_mp(b, 196, base); + sp_4096_from_bin(e, 196, exp, expLen); + sp_4096_from_mp(m, 196, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && + ((m[195] << 15) | (m[194] >> 6)) == 0xffffL) { + err = sp_4096_mod_exp_2_196(r, e, expLen * 8, m); + } + else + #endif + err = sp_4096_mod_exp_196(r, b, e, expLen * 8, m, 0); + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 196U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[392], ed[196], md[196]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + int err = MP_OKAY; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512U) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 196 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 196 * 2; + m = e + 196; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 196, base); + sp_4096_from_bin(e, 196, exp, expLen); + sp_4096_from_mp(m, 196, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2U && + ((m[195] << 15) | (m[194] >> 6)) == 0xffffL) { + err = sp_4096_mod_exp_2_196(r, e, expLen * 8U, m); + } + else { + #endif + err = sp_4096_mod_exp_196(r, b, e, expLen * 8U, m, 0); + #ifdef HAVE_FFDHE_4096 + } + #endif + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512U && out[i] == 0U; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + XMEMSET(e, 0, sizeof(sp_digit) * 196U); + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_DH); +#endif + + return err; +#endif +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_ECC #ifndef WOLFSSL_SP_NO_256 diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index 1bd086eb9..a498483b8 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -7994,6 +7994,4265 @@ int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #endif /* !WOLFSSL_SP_NO_3072 */ +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 45U) { + r[j] &= 0x1fffffffffffffL; + s = 53U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 53 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 53 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0x1fffffffffffffL; + s = 53U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 53U) <= (word32)DIGIT_BIT) { + s += 53U; + r[j] &= 0x1fffffffffffffL; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 53) { + r[j] &= 0x1fffffffffffffL; + if (j + 1 >= size) { + break; + } + s = 53 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + for (i=0; i<77; i++) { + r[i+1] += r[i] >> 53; + r[i] &= 0x1fffffffffffffL; + } + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<78 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/ + if (j < 0) { + break; + } + while (b < 53) { + a[j--] = r[i] >> b; b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 53); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_13(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int128_t t0 = ((int128_t)a[ 0]) * b[ 0]; + int128_t t1 = ((int128_t)a[ 0]) * b[ 1] + + ((int128_t)a[ 1]) * b[ 0]; + int128_t t2 = ((int128_t)a[ 0]) * b[ 2] + + ((int128_t)a[ 1]) * b[ 1] + + ((int128_t)a[ 2]) * b[ 0]; + int128_t t3 = ((int128_t)a[ 0]) * b[ 3] + + ((int128_t)a[ 1]) * b[ 2] + + ((int128_t)a[ 2]) * b[ 1] + + ((int128_t)a[ 3]) * b[ 0]; + int128_t t4 = ((int128_t)a[ 0]) * b[ 4] + + ((int128_t)a[ 1]) * b[ 3] + + ((int128_t)a[ 2]) * b[ 2] + + ((int128_t)a[ 3]) * b[ 1] + + ((int128_t)a[ 4]) * b[ 0]; + int128_t t5 = ((int128_t)a[ 0]) * b[ 5] + + ((int128_t)a[ 1]) * b[ 4] + + ((int128_t)a[ 2]) * b[ 3] + + ((int128_t)a[ 3]) * b[ 2] + + ((int128_t)a[ 4]) * b[ 1] + + ((int128_t)a[ 5]) * b[ 0]; + int128_t t6 = ((int128_t)a[ 0]) * b[ 6] + + ((int128_t)a[ 1]) * b[ 5] + + ((int128_t)a[ 2]) * b[ 4] + + ((int128_t)a[ 3]) * b[ 3] + + ((int128_t)a[ 4]) * b[ 2] + + ((int128_t)a[ 5]) * b[ 1] + + ((int128_t)a[ 6]) * b[ 0]; + int128_t t7 = ((int128_t)a[ 0]) * b[ 7] + + ((int128_t)a[ 1]) * b[ 6] + + ((int128_t)a[ 2]) * b[ 5] + + ((int128_t)a[ 3]) * b[ 4] + + ((int128_t)a[ 4]) * b[ 3] + + ((int128_t)a[ 5]) * b[ 2] + + ((int128_t)a[ 6]) * b[ 1] + + ((int128_t)a[ 7]) * b[ 0]; + int128_t t8 = ((int128_t)a[ 0]) * b[ 8] + + ((int128_t)a[ 1]) * b[ 7] + + ((int128_t)a[ 2]) * b[ 6] + + ((int128_t)a[ 3]) * b[ 5] + + ((int128_t)a[ 4]) * b[ 4] + + ((int128_t)a[ 5]) * b[ 3] + + ((int128_t)a[ 6]) * b[ 2] + + ((int128_t)a[ 7]) * b[ 1] + + ((int128_t)a[ 8]) * b[ 0]; + int128_t t9 = ((int128_t)a[ 0]) * b[ 9] + + ((int128_t)a[ 1]) * b[ 8] + + ((int128_t)a[ 2]) * b[ 7] + + ((int128_t)a[ 3]) * b[ 6] + + ((int128_t)a[ 4]) * b[ 5] + + ((int128_t)a[ 5]) * b[ 4] + + ((int128_t)a[ 6]) * b[ 3] + + ((int128_t)a[ 7]) * b[ 2] + + ((int128_t)a[ 8]) * b[ 1] + + ((int128_t)a[ 9]) * b[ 0]; + int128_t t10 = ((int128_t)a[ 0]) * b[10] + + ((int128_t)a[ 1]) * b[ 9] + + ((int128_t)a[ 2]) * b[ 8] + + ((int128_t)a[ 3]) * b[ 7] + + ((int128_t)a[ 4]) * b[ 6] + + ((int128_t)a[ 5]) * b[ 5] + + ((int128_t)a[ 6]) * b[ 4] + + ((int128_t)a[ 7]) * b[ 3] + + ((int128_t)a[ 8]) * b[ 2] + + ((int128_t)a[ 9]) * b[ 1] + + ((int128_t)a[10]) * b[ 0]; + int128_t t11 = ((int128_t)a[ 0]) * b[11] + + ((int128_t)a[ 1]) * b[10] + + ((int128_t)a[ 2]) * b[ 9] + + ((int128_t)a[ 3]) * b[ 8] + + ((int128_t)a[ 4]) * b[ 7] + + ((int128_t)a[ 5]) * b[ 6] + + ((int128_t)a[ 6]) * b[ 5] + + ((int128_t)a[ 7]) * b[ 4] + + ((int128_t)a[ 8]) * b[ 3] + + ((int128_t)a[ 9]) * b[ 2] + + ((int128_t)a[10]) * b[ 1] + + ((int128_t)a[11]) * b[ 0]; + int128_t t12 = ((int128_t)a[ 0]) * b[12] + + ((int128_t)a[ 1]) * b[11] + + ((int128_t)a[ 2]) * b[10] + + ((int128_t)a[ 3]) * b[ 9] + + ((int128_t)a[ 4]) * b[ 8] + + ((int128_t)a[ 5]) * b[ 7] + + ((int128_t)a[ 6]) * b[ 6] + + ((int128_t)a[ 7]) * b[ 5] + + ((int128_t)a[ 8]) * b[ 4] + + ((int128_t)a[ 9]) * b[ 3] + + ((int128_t)a[10]) * b[ 2] + + ((int128_t)a[11]) * b[ 1] + + ((int128_t)a[12]) * b[ 0]; + int128_t t13 = ((int128_t)a[ 1]) * b[12] + + ((int128_t)a[ 2]) * b[11] + + ((int128_t)a[ 3]) * b[10] + + ((int128_t)a[ 4]) * b[ 9] + + ((int128_t)a[ 5]) * b[ 8] + + ((int128_t)a[ 6]) * b[ 7] + + ((int128_t)a[ 7]) * b[ 6] + + ((int128_t)a[ 8]) * b[ 5] + + ((int128_t)a[ 9]) * b[ 4] + + ((int128_t)a[10]) * b[ 3] + + ((int128_t)a[11]) * b[ 2] + + ((int128_t)a[12]) * b[ 1]; + int128_t t14 = ((int128_t)a[ 2]) * b[12] + + ((int128_t)a[ 3]) * b[11] + + ((int128_t)a[ 4]) * b[10] + + ((int128_t)a[ 5]) * b[ 9] + + ((int128_t)a[ 6]) * b[ 8] + + ((int128_t)a[ 7]) * b[ 7] + + ((int128_t)a[ 8]) * b[ 6] + + ((int128_t)a[ 9]) * b[ 5] + + ((int128_t)a[10]) * b[ 4] + + ((int128_t)a[11]) * b[ 3] + + ((int128_t)a[12]) * b[ 2]; + int128_t t15 = ((int128_t)a[ 3]) * b[12] + + ((int128_t)a[ 4]) * b[11] + + ((int128_t)a[ 5]) * b[10] + + ((int128_t)a[ 6]) * b[ 9] + + ((int128_t)a[ 7]) * b[ 8] + + ((int128_t)a[ 8]) * b[ 7] + + ((int128_t)a[ 9]) * b[ 6] + + ((int128_t)a[10]) * b[ 5] + + ((int128_t)a[11]) * b[ 4] + + ((int128_t)a[12]) * b[ 3]; + int128_t t16 = ((int128_t)a[ 4]) * b[12] + + ((int128_t)a[ 5]) * b[11] + + ((int128_t)a[ 6]) * b[10] + + ((int128_t)a[ 7]) * b[ 9] + + ((int128_t)a[ 8]) * b[ 8] + + ((int128_t)a[ 9]) * b[ 7] + + ((int128_t)a[10]) * b[ 6] + + ((int128_t)a[11]) * b[ 5] + + ((int128_t)a[12]) * b[ 4]; + int128_t t17 = ((int128_t)a[ 5]) * b[12] + + ((int128_t)a[ 6]) * b[11] + + ((int128_t)a[ 7]) * b[10] + + ((int128_t)a[ 8]) * b[ 9] + + ((int128_t)a[ 9]) * b[ 8] + + ((int128_t)a[10]) * b[ 7] + + ((int128_t)a[11]) * b[ 6] + + ((int128_t)a[12]) * b[ 5]; + int128_t t18 = ((int128_t)a[ 6]) * b[12] + + ((int128_t)a[ 7]) * b[11] + + ((int128_t)a[ 8]) * b[10] + + ((int128_t)a[ 9]) * b[ 9] + + ((int128_t)a[10]) * b[ 8] + + ((int128_t)a[11]) * b[ 7] + + ((int128_t)a[12]) * b[ 6]; + int128_t t19 = ((int128_t)a[ 7]) * b[12] + + ((int128_t)a[ 8]) * b[11] + + ((int128_t)a[ 9]) * b[10] + + ((int128_t)a[10]) * b[ 9] + + ((int128_t)a[11]) * b[ 8] + + ((int128_t)a[12]) * b[ 7]; + int128_t t20 = ((int128_t)a[ 8]) * b[12] + + ((int128_t)a[ 9]) * b[11] + + ((int128_t)a[10]) * b[10] + + ((int128_t)a[11]) * b[ 9] + + ((int128_t)a[12]) * b[ 8]; + int128_t t21 = ((int128_t)a[ 9]) * b[12] + + ((int128_t)a[10]) * b[11] + + ((int128_t)a[11]) * b[10] + + ((int128_t)a[12]) * b[ 9]; + int128_t t22 = ((int128_t)a[10]) * b[12] + + ((int128_t)a[11]) * b[11] + + ((int128_t)a[12]) * b[10]; + int128_t t23 = ((int128_t)a[11]) * b[12] + + ((int128_t)a[12]) * b[11]; + int128_t t24 = ((int128_t)a[12]) * b[12]; + + t1 += t0 >> 53; r[ 0] = t0 & 0x1fffffffffffffL; + t2 += t1 >> 53; r[ 1] = t1 & 0x1fffffffffffffL; + t3 += t2 >> 53; r[ 2] = t2 & 0x1fffffffffffffL; + t4 += t3 >> 53; r[ 3] = t3 & 0x1fffffffffffffL; + t5 += t4 >> 53; r[ 4] = t4 & 0x1fffffffffffffL; + t6 += t5 >> 53; r[ 5] = t5 & 0x1fffffffffffffL; + t7 += t6 >> 53; r[ 6] = t6 & 0x1fffffffffffffL; + t8 += t7 >> 53; r[ 7] = t7 & 0x1fffffffffffffL; + t9 += t8 >> 53; r[ 8] = t8 & 0x1fffffffffffffL; + t10 += t9 >> 53; r[ 9] = t9 & 0x1fffffffffffffL; + t11 += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL; + t12 += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL; + t13 += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL; + t14 += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL; + t15 += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL; + t16 += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL; + t17 += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL; + t18 += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL; + t19 += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL; + t20 += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL; + t21 += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL; + t22 += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL; + t23 += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL; + t24 += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL; + r[25] = (sp_digit)(t24 >> 53); + r[24] = t24 & 0x1fffffffffffffL; +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_13(sp_digit* r, const sp_digit* a) +{ + int128_t t0 = ((int128_t)a[ 0]) * a[ 0]; + int128_t t1 = (((int128_t)a[ 0]) * a[ 1]) * 2; + int128_t t2 = (((int128_t)a[ 0]) * a[ 2]) * 2 + + ((int128_t)a[ 1]) * a[ 1]; + int128_t t3 = (((int128_t)a[ 0]) * a[ 3] + + ((int128_t)a[ 1]) * a[ 2]) * 2; + int128_t t4 = (((int128_t)a[ 0]) * a[ 4] + + ((int128_t)a[ 1]) * a[ 3]) * 2 + + ((int128_t)a[ 2]) * a[ 2]; + int128_t t5 = (((int128_t)a[ 0]) * a[ 5] + + ((int128_t)a[ 1]) * a[ 4] + + ((int128_t)a[ 2]) * a[ 3]) * 2; + int128_t t6 = (((int128_t)a[ 0]) * a[ 6] + + ((int128_t)a[ 1]) * a[ 5] + + ((int128_t)a[ 2]) * a[ 4]) * 2 + + ((int128_t)a[ 3]) * a[ 3]; + int128_t t7 = (((int128_t)a[ 0]) * a[ 7] + + ((int128_t)a[ 1]) * a[ 6] + + ((int128_t)a[ 2]) * a[ 5] + + ((int128_t)a[ 3]) * a[ 4]) * 2; + int128_t t8 = (((int128_t)a[ 0]) * a[ 8] + + ((int128_t)a[ 1]) * a[ 7] + + ((int128_t)a[ 2]) * a[ 6] + + ((int128_t)a[ 3]) * a[ 5]) * 2 + + ((int128_t)a[ 4]) * a[ 4]; + int128_t t9 = (((int128_t)a[ 0]) * a[ 9] + + ((int128_t)a[ 1]) * a[ 8] + + ((int128_t)a[ 2]) * a[ 7] + + ((int128_t)a[ 3]) * a[ 6] + + ((int128_t)a[ 4]) * a[ 5]) * 2; + int128_t t10 = (((int128_t)a[ 0]) * a[10] + + ((int128_t)a[ 1]) * a[ 9] + + ((int128_t)a[ 2]) * a[ 8] + + ((int128_t)a[ 3]) * a[ 7] + + ((int128_t)a[ 4]) * a[ 6]) * 2 + + ((int128_t)a[ 5]) * a[ 5]; + int128_t t11 = (((int128_t)a[ 0]) * a[11] + + ((int128_t)a[ 1]) * a[10] + + ((int128_t)a[ 2]) * a[ 9] + + ((int128_t)a[ 3]) * a[ 8] + + ((int128_t)a[ 4]) * a[ 7] + + ((int128_t)a[ 5]) * a[ 6]) * 2; + int128_t t12 = (((int128_t)a[ 0]) * a[12] + + ((int128_t)a[ 1]) * a[11] + + ((int128_t)a[ 2]) * a[10] + + ((int128_t)a[ 3]) * a[ 9] + + ((int128_t)a[ 4]) * a[ 8] + + ((int128_t)a[ 5]) * a[ 7]) * 2 + + ((int128_t)a[ 6]) * a[ 6]; + int128_t t13 = (((int128_t)a[ 1]) * a[12] + + ((int128_t)a[ 2]) * a[11] + + ((int128_t)a[ 3]) * a[10] + + ((int128_t)a[ 4]) * a[ 9] + + ((int128_t)a[ 5]) * a[ 8] + + ((int128_t)a[ 6]) * a[ 7]) * 2; + int128_t t14 = (((int128_t)a[ 2]) * a[12] + + ((int128_t)a[ 3]) * a[11] + + ((int128_t)a[ 4]) * a[10] + + ((int128_t)a[ 5]) * a[ 9] + + ((int128_t)a[ 6]) * a[ 8]) * 2 + + ((int128_t)a[ 7]) * a[ 7]; + int128_t t15 = (((int128_t)a[ 3]) * a[12] + + ((int128_t)a[ 4]) * a[11] + + ((int128_t)a[ 5]) * a[10] + + ((int128_t)a[ 6]) * a[ 9] + + ((int128_t)a[ 7]) * a[ 8]) * 2; + int128_t t16 = (((int128_t)a[ 4]) * a[12] + + ((int128_t)a[ 5]) * a[11] + + ((int128_t)a[ 6]) * a[10] + + ((int128_t)a[ 7]) * a[ 9]) * 2 + + ((int128_t)a[ 8]) * a[ 8]; + int128_t t17 = (((int128_t)a[ 5]) * a[12] + + ((int128_t)a[ 6]) * a[11] + + ((int128_t)a[ 7]) * a[10] + + ((int128_t)a[ 8]) * a[ 9]) * 2; + int128_t t18 = (((int128_t)a[ 6]) * a[12] + + ((int128_t)a[ 7]) * a[11] + + ((int128_t)a[ 8]) * a[10]) * 2 + + ((int128_t)a[ 9]) * a[ 9]; + int128_t t19 = (((int128_t)a[ 7]) * a[12] + + ((int128_t)a[ 8]) * a[11] + + ((int128_t)a[ 9]) * a[10]) * 2; + int128_t t20 = (((int128_t)a[ 8]) * a[12] + + ((int128_t)a[ 9]) * a[11]) * 2 + + ((int128_t)a[10]) * a[10]; + int128_t t21 = (((int128_t)a[ 9]) * a[12] + + ((int128_t)a[10]) * a[11]) * 2; + int128_t t22 = (((int128_t)a[10]) * a[12]) * 2 + + ((int128_t)a[11]) * a[11]; + int128_t t23 = (((int128_t)a[11]) * a[12]) * 2; + int128_t t24 = ((int128_t)a[12]) * a[12]; + + t1 += t0 >> 53; r[ 0] = t0 & 0x1fffffffffffffL; + t2 += t1 >> 53; r[ 1] = t1 & 0x1fffffffffffffL; + t3 += t2 >> 53; r[ 2] = t2 & 0x1fffffffffffffL; + t4 += t3 >> 53; r[ 3] = t3 & 0x1fffffffffffffL; + t5 += t4 >> 53; r[ 4] = t4 & 0x1fffffffffffffL; + t6 += t5 >> 53; r[ 5] = t5 & 0x1fffffffffffffL; + t7 += t6 >> 53; r[ 6] = t6 & 0x1fffffffffffffL; + t8 += t7 >> 53; r[ 7] = t7 & 0x1fffffffffffffL; + t9 += t8 >> 53; r[ 8] = t8 & 0x1fffffffffffffL; + t10 += t9 >> 53; r[ 9] = t9 & 0x1fffffffffffffL; + t11 += t10 >> 53; r[10] = t10 & 0x1fffffffffffffL; + t12 += t11 >> 53; r[11] = t11 & 0x1fffffffffffffL; + t13 += t12 >> 53; r[12] = t12 & 0x1fffffffffffffL; + t14 += t13 >> 53; r[13] = t13 & 0x1fffffffffffffL; + t15 += t14 >> 53; r[14] = t14 & 0x1fffffffffffffL; + t16 += t15 >> 53; r[15] = t15 & 0x1fffffffffffffL; + t17 += t16 >> 53; r[16] = t16 & 0x1fffffffffffffL; + t18 += t17 >> 53; r[17] = t17 & 0x1fffffffffffffL; + t19 += t18 >> 53; r[18] = t18 & 0x1fffffffffffffL; + t20 += t19 >> 53; r[19] = t19 & 0x1fffffffffffffL; + t21 += t20 >> 53; r[20] = t20 & 0x1fffffffffffffL; + t22 += t21 >> 53; r[21] = t21 & 0x1fffffffffffffL; + t23 += t22 >> 53; r[22] = t22 & 0x1fffffffffffffL; + t24 += t23 >> 53; r[23] = t23 & 0x1fffffffffffffL; + r[25] = (sp_digit)(t24 >> 53); + r[24] = t24 & 0x1fffffffffffffL; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_13(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + r[ 0] = a[ 0] + b[ 0]; + r[ 1] = a[ 1] + b[ 1]; + r[ 2] = a[ 2] + b[ 2]; + r[ 3] = a[ 3] + b[ 3]; + r[ 4] = a[ 4] + b[ 4]; + r[ 5] = a[ 5] + b[ 5]; + r[ 6] = a[ 6] + b[ 6]; + r[ 7] = a[ 7] + b[ 7]; + r[ 8] = a[ 8] + b[ 8]; + r[ 9] = a[ 9] + b[ 9]; + r[10] = a[10] + b[10]; + r[11] = a[11] + b[11]; + r[12] = a[12] + b[12]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_26(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[24] = a[24] - b[24]; + r[25] = a[25] - b[25]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_26(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 24; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[24] = a[24] + b[24]; + r[25] = a[25] + b[25]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit p0[26]; + sp_digit p1[26]; + sp_digit p2[26]; + sp_digit p3[26]; + sp_digit p4[26]; + sp_digit p5[26]; + sp_digit t0[26]; + sp_digit t1[26]; + sp_digit t2[26]; + sp_digit a0[13]; + sp_digit a1[13]; + sp_digit a2[13]; + sp_digit b0[13]; + sp_digit b1[13]; + sp_digit b2[13]; + (void)sp_4096_add_13(a0, a, &a[13]); + (void)sp_4096_add_13(b0, b, &b[13]); + (void)sp_4096_add_13(a1, &a[13], &a[26]); + (void)sp_4096_add_13(b1, &b[13], &b[26]); + (void)sp_4096_add_13(a2, a0, &a[26]); + (void)sp_4096_add_13(b2, b0, &b[26]); + sp_4096_mul_13(p0, a, b); + sp_4096_mul_13(p2, &a[13], &b[13]); + sp_4096_mul_13(p4, &a[26], &b[26]); + sp_4096_mul_13(p1, a0, b0); + sp_4096_mul_13(p3, a1, b1); + sp_4096_mul_13(p5, a2, b2); + XMEMSET(r, 0, sizeof(*r)*2U*39U); + (void)sp_4096_sub_26(t0, p3, p2); + (void)sp_4096_sub_26(t1, p1, p2); + (void)sp_4096_sub_26(t2, p5, t0); + (void)sp_4096_sub_26(t2, t2, t1); + (void)sp_4096_sub_26(t0, t0, p4); + (void)sp_4096_sub_26(t1, t1, p0); + (void)sp_4096_add_26(r, r, p0); + (void)sp_4096_add_26(&r[13], &r[13], t1); + (void)sp_4096_add_26(&r[26], &r[26], t2); + (void)sp_4096_add_26(&r[39], &r[39], t0); + (void)sp_4096_add_26(&r[52], &r[52], p4); +} + +/* Square a into r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a) +{ + sp_digit p0[26]; + sp_digit p1[26]; + sp_digit p2[26]; + sp_digit p3[26]; + sp_digit p4[26]; + sp_digit p5[26]; + sp_digit t0[26]; + sp_digit t1[26]; + sp_digit t2[26]; + sp_digit a0[13]; + sp_digit a1[13]; + sp_digit a2[13]; + (void)sp_4096_add_13(a0, a, &a[13]); + (void)sp_4096_add_13(a1, &a[13], &a[26]); + (void)sp_4096_add_13(a2, a0, &a[26]); + sp_4096_sqr_13(p0, a); + sp_4096_sqr_13(p2, &a[13]); + sp_4096_sqr_13(p4, &a[26]); + sp_4096_sqr_13(p1, a0); + sp_4096_sqr_13(p3, a1); + sp_4096_sqr_13(p5, a2); + XMEMSET(r, 0, sizeof(*r)*2U*39U); + (void)sp_4096_sub_26(t0, p3, p2); + (void)sp_4096_sub_26(t1, p1, p2); + (void)sp_4096_sub_26(t2, p5, t0); + (void)sp_4096_sub_26(t2, t2, t1); + (void)sp_4096_sub_26(t0, t0, p4); + (void)sp_4096_sub_26(t1, t1, p0); + (void)sp_4096_add_26(r, r, p0); + (void)sp_4096_add_26(&r[13], &r[13], t1); + (void)sp_4096_add_26(&r[26], &r[26], t2); + (void)sp_4096_add_26(&r[39], &r[39], t0); + (void)sp_4096_add_26(&r[52], &r[52], p4); +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[32] = a[32] + b[32]; + r[33] = a[33] + b[33]; + r[34] = a[34] + b[34]; + r[35] = a[35] + b[35]; + r[36] = a[36] + b[36]; + r[37] = a[37] + b[37]; + r[38] = a[38] + b[38]; + + return 0; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = a[i + 0] + b[i + 0]; + r[i + 1] = a[i + 1] + b[i + 1]; + r[i + 2] = a[i + 2] + b[i + 2]; + r[i + 3] = a[i + 3] + b[i + 3]; + r[i + 4] = a[i + 4] + b[i + 4]; + r[i + 5] = a[i + 5] + b[i + 5]; + r[i + 6] = a[i + 6] + b[i + 6]; + r[i + 7] = a[i + 7] + b[i + 7]; + } + r[72] = a[72] + b[72]; + r[73] = a[73] + b[73]; + r[74] = a[74] + b[74]; + r[75] = a[75] + b[75]; + r[76] = a[76] + b[76]; + r[77] = a[77] + b[77]; + + return 0; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[72] = a[72] - b[72]; + r[73] = a[73] - b[73]; + r[74] = a[74] - b[74]; + r[75] = a[75] - b[75]; + r[76] = a[76] - b[76]; + r[77] = a[77] - b[77]; + + return 0; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[78]; + sp_digit* a1 = z1; + sp_digit b1[39]; + sp_digit* z2 = r + 78; + (void)sp_4096_add_39(a1, a, &a[39]); + (void)sp_4096_add_39(b1, b, &b[39]); + sp_4096_mul_39(z2, &a[39], &b[39]); + sp_4096_mul_39(z0, a, b); + sp_4096_mul_39(z1, a1, b1); + (void)sp_4096_sub_78(z1, z1, z2); + (void)sp_4096_sub_78(z1, z1, z0); + (void)sp_4096_add_78(r + 39, r + 39, z1); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_78(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z1[78]; + sp_digit* a1 = z1; + sp_digit* z2 = r + 78; + (void)sp_4096_add_39(a1, a, &a[39]); + sp_4096_sqr_39(z2, &a[39]); + sp_4096_sqr_39(z0, a); + sp_4096_sqr_39(z1, a1); + (void)sp_4096_sub_78(z1, z1, z2); + (void)sp_4096_sub_78(z1, z1, z0); + (void)sp_4096_add_78(r + 39, r + 39, z1); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[77]) * b[77]; + r[155] = (sp_digit)(c >> 53); + c = (c & 0x1fffffffffffffL) << 53; + for (k = 153; k >= 0; k--) { + for (i = 77; i >= 0; i--) { + j = k - i; + if (j >= 78) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 106; + r[k + 1] = (c >> 53) & 0x1fffffffffffffL; + c = (c & 0x1fffffffffffffL) << 53; + } + r[0] = (sp_digit)(c >> 53); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_78(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[77]) * a[77]; + r[155] = (sp_digit)(c >> 53); + c = (c & 0x1fffffffffffffL) << 53; + for (k = 153; k >= 0; k--) { + for (i = 77; i >= 0; i--) { + j = k - i; + if (j >= 78 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 106; + r[k + 1] = (c >> 53) & 0x1fffffffffffffL; + c = (c & 0x1fffffffffffffL) << 53; + } + r[0] = (sp_digit)(c >> 53); +} + +#endif /* WOLFSSL_SP_SMALL */ +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] - b[i + 0]; + r[i + 1] = a[i + 1] - b[i + 1]; + r[i + 2] = a[i + 2] - b[i + 2]; + r[i + 3] = a[i + 3] - b[i + 3]; + r[i + 4] = a[i + 4] - b[i + 4]; + r[i + 5] = a[i + 5] - b[i + 5]; + r[i + 6] = a[i + 6] - b[i + 6]; + r[i + 7] = a[i + 7] - b[i + 7]; + } + r[32] = a[32] - b[32]; + r[33] = a[33] - b[33]; + r[34] = a[34] - b[34]; + r[35] = a[35] - b[35]; + r[36] = a[36] - b[36]; + r[37] = a[37] - b[37]; + r[38] = a[38] - b[38]; + + return 0; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[38]) * b[38]; + r[77] = (sp_digit)(c >> 53); + c = (c & 0x1fffffffffffffL) << 53; + for (k = 75; k >= 0; k--) { + for (i = 38; i >= 0; i--) { + j = k - i; + if (j >= 39) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * b[j]; + } + r[k + 2] += c >> 106; + r[k + 1] = (c >> 53) & 0x1fffffffffffffL; + c = (c & 0x1fffffffffffffL) << 53; + } + r[0] = (sp_digit)(c >> 53); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_39(sp_digit* r, const sp_digit* a) +{ + int i, j, k; + int128_t c; + + c = ((int128_t)a[38]) * a[38]; + r[77] = (sp_digit)(c >> 53); + c = (c & 0x1fffffffffffffL) << 53; + for (k = 75; k >= 0; k--) { + for (i = 38; i >= 0; i--) { + j = k - i; + if (j >= 39 || i <= j) { + break; + } + if (j < 0) { + continue; + } + + c += ((int128_t)a[i]) * a[j] * 2; + } + if (i == j) { + c += ((int128_t)a[i]) * a[i]; + } + + r[k + 2] += c >> 106; + r[k + 1] = (c >> 53) & 0x1fffffffffffffL; + c = (c & 0x1fffffffffffffL) << 53; + } + r[0] = (sp_digit)(c >> 53); +} + +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + x &= 0x1fffffffffffffL; + + /* rho = -1/m mod b */ + *rho = (1L << 53) - x; +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_78(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 78; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[78] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL; + for (i = 0; i < 72; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + } + t[1] = tb * a[73]; + r[73] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[74]; + r[74] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[75]; + r[75] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[76]; + r[76] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[77]; + r[77] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + r[78] = (sp_digit)(t[5] >> 53); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A signle precision number. + */ +static void sp_4096_mont_norm_39(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<38; i++) { + r[i] = 0x1fffffffffffffL; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = 0x1fffffffffffffL; + r[i + 1] = 0x1fffffffffffffL; + r[i + 2] = 0x1fffffffffffffL; + r[i + 3] = 0x1fffffffffffffL; + r[i + 4] = 0x1fffffffffffffL; + r[i + 5] = 0x1fffffffffffffL; + r[i + 6] = 0x1fffffffffffffL; + r[i + 7] = 0x1fffffffffffffL; + } + r[32] = 0x1fffffffffffffL; + r[33] = 0x1fffffffffffffL; + r[34] = 0x1fffffffffffffL; + r[35] = 0x1fffffffffffffL; + r[36] = 0x1fffffffffffffL; + r[37] = 0x1fffffffffffffL; +#endif + r[38] = 0x3ffffffffL; + + /* r = (2^n - 1) mod n */ + (void)sp_4096_sub_39(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_4096_cmp_39(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=38; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[38] - b[38]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[37] - b[37]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[36] - b[36]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[35] - b[35]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[34] - b[34]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[33] - b[33]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[32] - b[32]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 24; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_4096_cond_sub_39(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[32] = a[32] - (b[32] & m); + r[33] = a[33] - (b[33] & m); + r[34] = a[34] - (b[34] & m); + r[35] = a[35] - (b[35] & m); + r[36] = a[36] - (b[36] & m); + r[37] = a[37] - (b[37] & m); + r[38] = a[38] - (b[38] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_add_39(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 39; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[39] += t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += t[0] & 0x1fffffffffffffL; + for (i = 0; i < 32; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] += (t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] += (t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + } + t[1] = tb * a[33]; r[33] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[34]; r[34] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[35]; r[35] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[36]; r[36] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[37]; r[37] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[38]; r[38] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + r[39] += t[6] >> 53; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 53. + * + * a Array of sp_digit to normalize. + */ +static void sp_4096_norm_39(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 38; i++) { + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + } +#else + int i; + for (i = 0; i < 32; i += 8) { + a[i+1] += a[i+0] >> 53; a[i+0] &= 0x1fffffffffffffL; + a[i+2] += a[i+1] >> 53; a[i+1] &= 0x1fffffffffffffL; + a[i+3] += a[i+2] >> 53; a[i+2] &= 0x1fffffffffffffL; + a[i+4] += a[i+3] >> 53; a[i+3] &= 0x1fffffffffffffL; + a[i+5] += a[i+4] >> 53; a[i+4] &= 0x1fffffffffffffL; + a[i+6] += a[i+5] >> 53; a[i+5] &= 0x1fffffffffffffL; + a[i+7] += a[i+6] >> 53; a[i+6] &= 0x1fffffffffffffL; + a[i+8] += a[i+7] >> 53; a[i+7] &= 0x1fffffffffffffL; + a[i+9] += a[i+8] >> 53; a[i+8] &= 0x1fffffffffffffL; + } + a[32+1] += a[32] >> 53; + a[32] &= 0x1fffffffffffffL; + a[33+1] += a[33] >> 53; + a[33] &= 0x1fffffffffffffL; + a[34+1] += a[34] >> 53; + a[34] &= 0x1fffffffffffffL; + a[35+1] += a[35] >> 53; + a[35] &= 0x1fffffffffffffL; + a[36+1] += a[36] >> 53; + a[36] &= 0x1fffffffffffffL; + a[37+1] += a[37] >> 53; + a[37] &= 0x1fffffffffffffL; +#endif +} + +/* Shift the result in the high 2048 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_4096_mont_shift_39(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int128_t n = a[38] >> 34; + n += ((int128_t)a[39]) << 19; + + for (i = 0; i < 38; i++) { + r[i] = n & 0x1fffffffffffffL; + n >>= 53; + n += ((int128_t)a[40 + i]) << 19; + } + r[38] = (sp_digit)n; +#else + int i; + int128_t n = a[38] >> 34; + n += ((int128_t)a[39]) << 19; + for (i = 0; i < 32; i += 8) { + r[i + 0] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 40]) << 19; + r[i + 1] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 41]) << 19; + r[i + 2] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 42]) << 19; + r[i + 3] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 43]) << 19; + r[i + 4] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 44]) << 19; + r[i + 5] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 45]) << 19; + r[i + 6] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 46]) << 19; + r[i + 7] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 47]) << 19; + } + r[32] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[72]) << 19; + r[33] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[73]) << 19; + r[34] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[74]) << 19; + r[35] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[75]) << 19; + r[36] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[76]) << 19; + r[37] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[77]) << 19; + r[38] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[39], 0, sizeof(*r) * 39U); +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + + for (i=0; i<38; i++) { + mu = (a[i] * mp) & 0x1fffffffffffffL; + sp_4096_mul_add_39(a+i, m, mu); + a[i+1] += a[i] >> 53; + } + mu = (a[i] * mp) & 0x3ffffffffL; + sp_4096_mul_add_39(a+i, m, mu); + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + + sp_4096_mont_shift_39(a, a); + sp_4096_cond_sub_39(a, a, m, 0 - (((a[38] >> 34) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_4096_norm_39(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_39(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_39(r, a, b); + sp_4096_mont_reduce_39(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_39(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_39(r, a); + sp_4096_mont_reduce_39(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_39(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 39; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[39] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL; + for (i = 0; i < 32; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + } + t[1] = tb * a[33]; + r[33] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[34]; + r[34] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[35]; + r[35] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[36]; + r[36] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[37]; + r[37] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[38]; + r[38] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + r[39] = (sp_digit)(t[6] >> 53); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_4096_cond_add_39(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[32] = a[32] + (b[32] & m); + r[33] = a[33] + (b[33] & m); + r[34] = a[34] + (b[34] & m); + r[35] = a[35] + (b[35] & m); + r[36] = a[36] + (b[36] & m); + r[37] = a[37] + (b[37] & m); + r[38] = a[38] + (b[38] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_39(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 39; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_4096_rshift_39(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<38; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (53 - n))) & 0x1fffffffffffffL; + } +#else + for (i=0; i<32; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (53 - n))) & 0x1fffffffffffffL; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (53 - n))) & 0x1fffffffffffffL; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (53 - n))) & 0x1fffffffffffffL; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (53 - n))) & 0x1fffffffffffffL; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (53 - n))) & 0x1fffffffffffffL; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (53 - n))) & 0x1fffffffffffffL; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (53 - n))) & 0x1fffffffffffffL; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (53 - n))) & 0x1fffffffffffffL; + } + r[32] = ((a[32] >> n) | (a[33] << (53 - n))) & 0x1fffffffffffffL; + r[33] = ((a[33] >> n) | (a[34] << (53 - n))) & 0x1fffffffffffffL; + r[34] = ((a[34] >> n) | (a[35] << (53 - n))) & 0x1fffffffffffffL; + r[35] = ((a[35] >> n) | (a[36] << (53 - n))) & 0x1fffffffffffffL; + r[36] = ((a[36] >> n) | (a[37] << (53 - n))) & 0x1fffffffffffffL; + r[37] = ((a[37] >> n) | (a[38] << (53 - n))) & 0x1fffffffffffffL; +#endif + r[38] = a[38] >> n; +} + +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t, dv; + int128_t t0, t1; + + /* dv has 27 bits. */ + dv = (div >> 26) + 1; + /* All 53 bits from d1 and top 10 bits from d0. */ + d = (d1 << 10) | (d0 >> 43); + r = d / dv; + d -= r * dv; + /* Up to 36 bits in r */ + /* Next 17 bits from d0. */ + d <<= 17; + r <<= 17; + d |= (d0 >> 26) & ((1 << 17) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 53 bits in r */ + + /* Handle rounding error with dv - top part */ + t0 = ((int128_t)d1 << 53) + d0; + t1 = (int128_t)r * dv; + t1 = t0 - t1; + t = (sp_digit)(t1 >> 26) / dv; + r += t; + + /* Handle rounding error with dv - bottom 64 bits */ + t1 = (sp_digit)t0 - (r * dv); + t = (sp_digit)t1 / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* td; +#else + sp_digit t1d[78 + 1], t2d[39 + 1], sdd[39 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 39 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + t1 = td; + t2 = td + 78 + 1; + sd = t2 + 39 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_4096_mul_d_39(sd, d, 1L << 19); + sp_4096_mul_d_78(t1, a, 1L << 19); + dv = sd[38]; + for (i=39; i>=0; i--) { + t1[39 + i] += t1[39 + i - 1] >> 53; + t1[39 + i - 1] &= 0x1fffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[39 + i]; + d1 <<= 53; + d1 += t1[39 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_4096_div_word_39(t1[39 + i], t1[39 + i - 1], dv); +#endif + + sp_4096_mul_d_39(t2, sd, r1); + (void)sp_4096_sub_39(&t1[i], &t1[i], t2); + t1[39 + i] -= t2[39]; + t1[39 + i] += t1[39 + i - 1] >> 53; + t1[39 + i - 1] &= 0x1fffffffffffffL; + r1 = (((-t1[39 + i]) << 53) - t1[39 + i - 1]) / dv; + r1 -= t1[39 + i]; + sp_4096_mul_d_39(t2, sd, r1); + (void)sp_4096_add_39(&t1[i], &t1[i], t2); + t1[39 + i] += t1[39 + i - 1] >> 53; + t1[39 + i - 1] &= 0x1fffffffffffffL; + } + t1[39 - 1] += t1[39 - 2] >> 53; + t1[39 - 2] &= 0x1fffffffffffffL; + d1 = t1[39 - 1]; + r1 = (sp_digit)(d1 / dv); + + sp_4096_mul_d_39(t2, sd, r1); + sp_4096_sub_39(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 39U); + for (i=0; i<37; i++) { + r[i+1] += r[i] >> 53; + r[i] &= 0x1fffffffffffffL; + } + sp_4096_cond_add_39(r, r, sd, 0 - ((r[38] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_4096_norm_39(r); + sp_4096_rshift_39(r, r, 19); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_mod_39(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_39(a, m, NULL, r); +} + +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 39 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 39U * 2U); + + norm = t[0] = td; + t[1] = &td[39 * 2]; + t[2] = &td[2 * 39 * 2]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_39(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_39(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 39U); + } + } + if (err == MP_OKAY) { + sp_4096_mul_39(t[1], t[1], norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 53; + c = bits % 53; + n = e[i--] << (53 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 53; + } + + y = (n >> 52) & 1; + n <<= 1; + + sp_4096_mont_mul_39(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 39 * 2); + sp_4096_mont_sqr_39(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 39 * 2); + } + + sp_4096_mont_reduce_39(t[0], m, mp); + n = sp_4096_cmp_39(t[0], m); + sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 39 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][78]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 39 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[39 * 2]; + t[2] = &td[2 * 39 * 2]; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_39(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_39(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_39(t[1], t[1], norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + } + else { + sp_4096_mul_39(t[1], a, norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 53; + c = bits % 53; + n = e[i--] << (53 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 53; + } + + y = (n >> 52) & 1; + n <<= 1; + + sp_4096_mont_mul_39(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_4096_mont_sqr_39(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_4096_mont_reduce_39(t[0], m, mp); + n = sp_4096_cmp_39(t[0], m); + sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][78]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[78]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 78, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 78; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_39(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_39(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_39(t[1], t[1], norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + } + else { + sp_4096_mul_39(t[1], a, norm); + err = sp_4096_mod_39(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_39(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_39(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_39(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_39(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_39(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_39(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_39(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_39(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_39(t[10], t[ 5], m, mp); + sp_4096_mont_mul_39(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_39(t[12], t[ 6], m, mp); + sp_4096_mont_mul_39(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_39(t[14], t[ 7], m, mp); + sp_4096_mont_mul_39(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_39(t[16], t[ 8], m, mp); + sp_4096_mont_mul_39(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_39(t[18], t[ 9], m, mp); + sp_4096_mont_mul_39(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_39(t[20], t[10], m, mp); + sp_4096_mont_mul_39(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_39(t[22], t[11], m, mp); + sp_4096_mont_mul_39(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_39(t[24], t[12], m, mp); + sp_4096_mont_mul_39(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_39(t[26], t[13], m, mp); + sp_4096_mont_mul_39(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_39(t[28], t[14], m, mp); + sp_4096_mont_mul_39(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_39(t[30], t[15], m, mp); + sp_4096_mont_mul_39(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 52) / 53) - 1; + c = bits % 53; + if (c == 0) { + c = 53; + } + if (i < 39) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_39(rt, rt, m, mp); + sp_4096_mont_sqr_39(rt, rt, m, mp); + sp_4096_mont_sqr_39(rt, rt, m, mp); + sp_4096_mont_sqr_39(rt, rt, m, mp); + sp_4096_mont_sqr_39(rt, rt, m, mp); + + sp_4096_mont_mul_39(rt, rt, t[y], m, mp); + } + + sp_4096_mont_reduce_39(rt, m, mp); + n = sp_4096_cmp_39(rt, m); + sp_4096_cond_sub_39(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} + +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */ + +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A signle precision number. + */ +static void sp_4096_mont_norm_78(sp_digit* r, const sp_digit* m) +{ + /* Set r = 2^n - 1. */ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<77; i++) { + r[i] = 0x1fffffffffffffL; + } +#else + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = 0x1fffffffffffffL; + r[i + 1] = 0x1fffffffffffffL; + r[i + 2] = 0x1fffffffffffffL; + r[i + 3] = 0x1fffffffffffffL; + r[i + 4] = 0x1fffffffffffffL; + r[i + 5] = 0x1fffffffffffffL; + r[i + 6] = 0x1fffffffffffffL; + r[i + 7] = 0x1fffffffffffffL; + } + r[72] = 0x1fffffffffffffL; + r[73] = 0x1fffffffffffffL; + r[74] = 0x1fffffffffffffL; + r[75] = 0x1fffffffffffffL; + r[76] = 0x1fffffffffffffL; +#endif + r[77] = 0x7fffL; + + /* r = (2^n - 1) mod n */ + (void)sp_4096_sub_78(r, r, m); + + /* Add one so r = 2^n mod m */ + r[0] += 1; +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +static sp_digit sp_4096_cmp_78(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=77; i>=0; i--) { + r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#else + int i; + + r |= (a[77] - b[77]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[76] - b[76]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[75] - b[75]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[74] - b[74]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[73] - b[73]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[72] - b[72]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + for (i = 64; i >= 0; i -= 8) { + r |= (a[i + 7] - b[i + 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 6] - b[i + 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 5] - b[i + 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 4] - b[i + 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 3] - b[i + 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 2] - b[i + 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 1] - b[i + 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + r |= (a[i + 0] - b[i + 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0)); + } +#endif /* WOLFSSL_SP_SMALL */ + + return r; +} + +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static void sp_4096_cond_sub_78(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] - (b[i] & m); + } +#else + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = a[i + 0] - (b[i + 0] & m); + r[i + 1] = a[i + 1] - (b[i + 1] & m); + r[i + 2] = a[i + 2] - (b[i + 2] & m); + r[i + 3] = a[i + 3] - (b[i + 3] & m); + r[i + 4] = a[i + 4] - (b[i + 4] & m); + r[i + 5] = a[i + 5] - (b[i + 5] & m); + r[i + 6] = a[i + 6] - (b[i + 6] & m); + r[i + 7] = a[i + 7] - (b[i + 7] & m); + } + r[72] = a[72] - (b[72] & m); + r[73] = a[73] - (b[73] & m); + r[74] = a[74] - (b[74] & m); + r[75] = a[75] - (b[75] & m); + r[76] = a[76] - (b[76] & m); + r[77] = a[77] - (b[77] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Mul a by scalar b and add into r. (r += a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_add_78(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 78; i++) { + t += (tb * a[i]) + r[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[78] += t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] += t[0] & 0x1fffffffffffffL; + for (i = 0; i < 72; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] += (t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] += (t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + } + t[1] = tb * a[73]; r[73] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[74]; r[74] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[75]; r[75] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[76]; r[76] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[77]; r[77] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + r[78] += t[5] >> 53; +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Normalize the values in each word to 53. + * + * a Array of sp_digit to normalize. + */ +static void sp_4096_norm_78(sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + for (i = 0; i < 77; i++) { + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + } +#else + int i; + for (i = 0; i < 72; i += 8) { + a[i+1] += a[i+0] >> 53; a[i+0] &= 0x1fffffffffffffL; + a[i+2] += a[i+1] >> 53; a[i+1] &= 0x1fffffffffffffL; + a[i+3] += a[i+2] >> 53; a[i+2] &= 0x1fffffffffffffL; + a[i+4] += a[i+3] >> 53; a[i+3] &= 0x1fffffffffffffL; + a[i+5] += a[i+4] >> 53; a[i+4] &= 0x1fffffffffffffL; + a[i+6] += a[i+5] >> 53; a[i+5] &= 0x1fffffffffffffL; + a[i+7] += a[i+6] >> 53; a[i+6] &= 0x1fffffffffffffL; + a[i+8] += a[i+7] >> 53; a[i+7] &= 0x1fffffffffffffL; + a[i+9] += a[i+8] >> 53; a[i+8] &= 0x1fffffffffffffL; + } + a[72+1] += a[72] >> 53; + a[72] &= 0x1fffffffffffffL; + a[73+1] += a[73] >> 53; + a[73] &= 0x1fffffffffffffL; + a[74+1] += a[74] >> 53; + a[74] &= 0x1fffffffffffffL; + a[75+1] += a[75] >> 53; + a[75] &= 0x1fffffffffffffL; + a[76+1] += a[76] >> 53; + a[76] &= 0x1fffffffffffffL; +#endif +} + +/* Shift the result in the high 4096 bits down to the bottom. + * + * r A single precision number. + * a A single precision number. + */ +static void sp_4096_mont_shift_78(sp_digit* r, const sp_digit* a) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + int128_t n = a[77] >> 15; + n += ((int128_t)a[78]) << 38; + + for (i = 0; i < 77; i++) { + r[i] = n & 0x1fffffffffffffL; + n >>= 53; + n += ((int128_t)a[79 + i]) << 38; + } + r[77] = (sp_digit)n; +#else + int i; + int128_t n = a[77] >> 15; + n += ((int128_t)a[78]) << 38; + for (i = 0; i < 72; i += 8) { + r[i + 0] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 79]) << 38; + r[i + 1] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 80]) << 38; + r[i + 2] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 81]) << 38; + r[i + 3] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 82]) << 38; + r[i + 4] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 83]) << 38; + r[i + 5] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 84]) << 38; + r[i + 6] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 85]) << 38; + r[i + 7] = n & 0x1fffffffffffffL; + n >>= 53; n += ((int128_t)a[i + 86]) << 38; + } + r[72] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[151]) << 38; + r[73] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[152]) << 38; + r[74] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[153]) << 38; + r[75] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[154]) << 38; + r[76] = n & 0x1fffffffffffffL; n >>= 53; n += ((int128_t)a[155]) << 38; + r[77] = (sp_digit)n; +#endif /* WOLFSSL_SP_SMALL */ + XMEMSET(&r[78], 0, sizeof(*r) * 78U); +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + int i; + sp_digit mu; + +#ifdef WOLFSSL_SP_DH + if (mp != 1) { + for (i=0; i<77; i++) { + mu = (a[i] * mp) & 0x1fffffffffffffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + } + mu = (a[i] * mp) & 0x7fffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + } + else { + for (i=0; i<77; i++) { + mu = a[i] & 0x1fffffffffffffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + } + mu = a[i] & 0x7fffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; + } +#else + for (i=0; i<77; i++) { + mu = (a[i] * mp) & 0x1fffffffffffffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + } + mu = (a[i] * mp) & 0x7fffL; + sp_4096_mul_add_78(a+i, m, mu); + a[i+1] += a[i] >> 53; + a[i] &= 0x1fffffffffffffL; +#endif + + sp_4096_mont_shift_78(a, a); + sp_4096_cond_sub_78(a, a, m, 0 - (((a[77] >> 15) > 0) ? + (sp_digit)1 : (sp_digit)0)); + sp_4096_norm_78(a); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_78(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_78(r, a, b); + sp_4096_mont_reduce_78(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_78(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_78(r, a); + sp_4096_mont_reduce_78(r, m, mp); +} + +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_4096_mul_d_156(sp_digit* r, const sp_digit* a, + sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 156; i++) { + t += tb * a[i]; + r[i] = t & 0x1fffffffffffffL; + t >>= 53; + } + r[156] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1fffffffffffffL; + for (i = 0; i < 152; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + } + t[1] = tb * a[153]; + r[153] = (sp_digit)(t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + t[2] = tb * a[154]; + r[154] = (sp_digit)(t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + t[3] = tb * a[155]; + r[155] = (sp_digit)(t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + r[156] = (sp_digit)(t[3] >> 53); +#endif /* WOLFSSL_SP_SMALL */ +} + +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static void sp_4096_cond_add_78(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] + (b[i] & m); + } +#else + int i; + + for (i = 0; i < 72; i += 8) { + r[i + 0] = a[i + 0] + (b[i + 0] & m); + r[i + 1] = a[i + 1] + (b[i + 1] & m); + r[i + 2] = a[i + 2] + (b[i + 2] & m); + r[i + 3] = a[i + 3] + (b[i + 3] & m); + r[i + 4] = a[i + 4] + (b[i + 4] & m); + r[i + 5] = a[i + 5] + (b[i + 5] & m); + r[i + 6] = a[i + 6] + (b[i + 6] & m); + r[i + 7] = a[i + 7] + (b[i + 7] & m); + } + r[72] = a[72] + (b[72] & m); + r[73] = a[73] + (b[73] & m); + r[74] = a[74] + (b[74] & m); + r[75] = a[75] + (b[75] & m); + r[76] = a[76] + (b[76] & m); + r[77] = a[77] + (b[77] & m); +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifdef WOLFSSL_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_sub_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] - b[i]; + } + + return 0; +} + +#endif +#ifdef WOLFSSL_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static int sp_4096_add_78(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + int i; + + for (i = 0; i < 78; i++) { + r[i] = a[i] + b[i]; + } + + return 0; +} +#endif +SP_NOINLINE static void sp_4096_rshift_78(sp_digit* r, sp_digit* a, byte n) +{ + int i; + +#ifdef WOLFSSL_SP_SMALL + for (i=0; i<77; i++) { + r[i] = ((a[i] >> n) | (a[i + 1] << (53 - n))) & 0x1fffffffffffffL; + } +#else + for (i=0; i<72; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (53 - n))) & 0x1fffffffffffffL; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (53 - n))) & 0x1fffffffffffffL; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (53 - n))) & 0x1fffffffffffffL; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (53 - n))) & 0x1fffffffffffffL; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (53 - n))) & 0x1fffffffffffffL; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (53 - n))) & 0x1fffffffffffffL; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (53 - n))) & 0x1fffffffffffffL; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (53 - n))) & 0x1fffffffffffffL; + } + r[72] = ((a[72] >> n) | (a[73] << (53 - n))) & 0x1fffffffffffffL; + r[73] = ((a[73] >> n) | (a[74] << (53 - n))) & 0x1fffffffffffffL; + r[74] = ((a[74] >> n) | (a[75] << (53 - n))) & 0x1fffffffffffffL; + r[75] = ((a[75] >> n) | (a[76] << (53 - n))) & 0x1fffffffffffffL; + r[76] = ((a[76] >> n) | (a[77] << (53 - n))) & 0x1fffffffffffffL; +#endif + r[77] = a[77] >> n; +} + +#ifdef WOLFSSL_SP_DIV_64 +static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0, + sp_digit dv) +{ + sp_digit d, r, t, dv; + int128_t t0, t1; + + /* dv has 27 bits. */ + dv = (div >> 26) + 1; + /* All 53 bits from d1 and top 10 bits from d0. */ + d = (d1 << 10) | (d0 >> 43); + r = d / dv; + d -= r * dv; + /* Up to 36 bits in r */ + /* Next 17 bits from d0. */ + d <<= 17; + r <<= 17; + d |= (d0 >> 26) & ((1 << 17) - 1); + t = d / dv; + d -= t * dv; + r += t; + /* Up to 53 bits in r */ + + /* Handle rounding error with dv - top part */ + t0 = ((int128_t)d1 << 53) + d0; + t1 = (int128_t)r * dv; + t1 = t0 - t1; + t = (sp_digit)(t1 >> 26) / dv; + r += t; + + /* Handle rounding error with dv - bottom 64 bits */ + t1 = (sp_digit)t0 - (r * dv); + t = (sp_digit)t1 / dv; + r += t; + + return r; +} +#endif /* WOLFSSL_SP_DIV_64 */ + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + int i; +#ifndef WOLFSSL_SP_DIV_64 + int128_t d1; +#endif + sp_digit dv, r1; +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* td; +#else + sp_digit t1d[156 + 1], t2d[78 + 1], sdd[78 + 1]; +#endif + sp_digit* t1; + sp_digit* t2; + sp_digit* sd; + int err = MP_OKAY; + + (void)m; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 78 + 3), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + (void)m; + + if (err == MP_OKAY) { +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + t1 = td; + t2 = td + 156 + 1; + sd = t2 + 78 + 1; +#else + t1 = t1d; + t2 = t2d; + sd = sdd; +#endif + + sp_4096_mul_d_78(sd, d, 1L << 38); + sp_4096_mul_d_156(t1, a, 1L << 38); + dv = sd[77]; + for (i=78; i>=0; i--) { + t1[78 + i] += t1[78 + i - 1] >> 53; + t1[78 + i - 1] &= 0x1fffffffffffffL; +#ifndef WOLFSSL_SP_DIV_64 + d1 = t1[78 + i]; + d1 <<= 53; + d1 += t1[78 + i - 1]; + r1 = (sp_digit)(d1 / dv); +#else + r1 = sp_4096_div_word_78(t1[78 + i], t1[78 + i - 1], dv); +#endif + + sp_4096_mul_d_78(t2, sd, r1); + (void)sp_4096_sub_78(&t1[i], &t1[i], t2); + t1[78 + i] -= t2[78]; + t1[78 + i] += t1[78 + i - 1] >> 53; + t1[78 + i - 1] &= 0x1fffffffffffffL; + r1 = (((-t1[78 + i]) << 53) - t1[78 + i - 1]) / dv; + r1 -= t1[78 + i]; + sp_4096_mul_d_78(t2, sd, r1); + (void)sp_4096_add_78(&t1[i], &t1[i], t2); + t1[78 + i] += t1[78 + i - 1] >> 53; + t1[78 + i - 1] &= 0x1fffffffffffffL; + } + t1[78 - 1] += t1[78 - 2] >> 53; + t1[78 - 2] &= 0x1fffffffffffffL; + d1 = t1[78 - 1]; + r1 = (sp_digit)(d1 / dv); + + sp_4096_mul_d_78(t2, sd, r1); + sp_4096_sub_78(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2U * 78U); + for (i=0; i<76; i++) { + r[i+1] += r[i] >> 53; + r[i] &= 0x1fffffffffffffL; + } + sp_4096_cond_add_78(r, r, sd, 0 - ((r[77] < 0) ? + (sp_digit)1 : (sp_digit)0)); + + sp_4096_norm_78(r); + sp_4096_rshift_78(r, r, 38); + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. + */ +static int sp_4096_mod_78(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_78(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, int bits, + const sp_digit* m, int reduceA) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* td; + sp_digit* t[3]; + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 78 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } + + if (err == MP_OKAY) { + XMEMSET(td, 0, sizeof(*td) * 3U * 78U * 2U); + + norm = t[0] = td; + t[1] = &td[78 * 2]; + t[2] = &td[2 * 78 * 2]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_78(t[1], a, m); + } + else { + XMEMCPY(t[1], a, sizeof(sp_digit) * 78U); + } + } + if (err == MP_OKAY) { + sp_4096_mul_78(t[1], t[1], norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + + if (err == MP_OKAY) { + i = bits / 53; + c = bits % 53; + n = e[i--] << (53 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 53; + } + + y = (n >> 52) & 1; + n <<= 1; + + sp_4096_mont_mul_78(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), + sizeof(*t[2]) * 78 * 2); + sp_4096_mont_sqr_78(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], + sizeof(*t[2]) * 78 * 2); + } + + sp_4096_mont_reduce_78(t[0], m, mp); + n = sp_4096_cmp_78(t[0], m); + sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 78 * 2); + + } + + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + + return err; +#elif defined(WOLFSSL_SP_CACHE_RESISTANT) +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[3][156]; +#else + sp_digit* td; + sp_digit* t[3]; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 78 * 2, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + t[0] = td; + t[1] = &td[78 * 2]; + t[2] = &td[2 * 78 * 2]; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_78(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_78(t[1], t[1], norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + } + else { + sp_4096_mul_78(t[1], a, norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + i = bits / 53; + c = bits % 53; + n = e[i--] << (53 - c); + for (; ; c--) { + if (c == 0) { + if (i == -1) { + break; + } + + n = e[i--]; + c = 53; + } + + y = (n >> 52) & 1; + n <<= 1; + + sp_4096_mont_mul_78(t[y^1], t[0], t[1], m, mp); + + XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); + sp_4096_mont_sqr_78(t[2], t[2], m, mp); + XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); + } + + sp_4096_mont_reduce_78(t[0], m, mp); + n = sp_4096_cmp_78(t[0], m); + sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, t[0], sizeof(t[0])); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][156]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit rt[156]; + sp_digit mp = 1; + sp_digit n; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 156, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 156; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + if (reduceA != 0) { + err = sp_4096_mod_78(t[1], a, m); + if (err == MP_OKAY) { + sp_4096_mul_78(t[1], t[1], norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + } + else { + sp_4096_mul_78(t[1], a, norm); + err = sp_4096_mod_78(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_78(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_78(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_78(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_78(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_78(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_78(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_78(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_78(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_78(t[10], t[ 5], m, mp); + sp_4096_mont_mul_78(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_78(t[12], t[ 6], m, mp); + sp_4096_mont_mul_78(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_78(t[14], t[ 7], m, mp); + sp_4096_mont_mul_78(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_78(t[16], t[ 8], m, mp); + sp_4096_mont_mul_78(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_78(t[18], t[ 9], m, mp); + sp_4096_mont_mul_78(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_78(t[20], t[10], m, mp); + sp_4096_mont_mul_78(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_78(t[22], t[11], m, mp); + sp_4096_mont_mul_78(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_78(t[24], t[12], m, mp); + sp_4096_mont_mul_78(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_78(t[26], t[13], m, mp); + sp_4096_mont_mul_78(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_78(t[28], t[14], m, mp); + sp_4096_mont_mul_78(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_78(t[30], t[15], m, mp); + sp_4096_mont_mul_78(t[31], t[16], t[15], m, mp); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 52) / 53) - 1; + c = bits % 53; + if (c == 0) { + c = 53; + } + if (i < 78) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + XMEMCPY(rt, t[y], sizeof(rt)); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_78(rt, rt, m, mp); + sp_4096_mont_sqr_78(rt, rt, m, mp); + sp_4096_mont_sqr_78(rt, rt, m, mp); + sp_4096_mont_sqr_78(rt, rt, m, mp); + sp_4096_mont_sqr_78(rt, rt, m, mp); + + sp_4096_mont_mul_78(rt, rt, t[y], m, mp); + } + + sp_4096_mont_reduce_78(rt, m, mp); + n = sp_4096_cmp_78(rt, m); + sp_4096_cond_sub_78(rt, rt, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + XMEMCPY(r, rt, sizeof(rt)); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +#endif +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ + /* WOLFSSL_HAVE_SP_DH */ + +#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ + !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_39(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<39; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 32; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } + r[32] = a[32] & m; + r[33] = a[33] & m; + r[34] = a[34] & m; + r[35] = a[35] & m; + r[36] = a[36] & m; + r[37] = a[37] & m; + r[38] = a[38] & m; +#endif +} + +#endif +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + sp_digit* d = NULL; + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit* norm; + sp_digit e[1] = {0}; + sp_digit mp; + int i; + int err = MP_OKAY; + + if (*outLen < 512U) { + err = MP_TO_E; + } + + if (err == MP_OKAY) { + if (mp_count_bits(em) > 53) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 78 * 2; + m = r + 78 * 2; + norm = r; + + sp_4096_from_bin(a, 78, in, inLen); +#if DIGIT_BIT >= 53 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(m, 78, mm); + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + } + if (err == MP_OKAY) { + sp_4096_mul_78(a, a, norm); + err = sp_4096_mod_78(a, a, m); + } + if (err == MP_OKAY) { + for (i=52; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 78 * 2); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_78(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_78(r, r, a, m, mp); + } + } + sp_4096_mont_reduce_78(r, m, mp); + mp = sp_4096_cmp_78(r, m); + sp_4096_cond_sub_78(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0)- 1); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[156], md[78], rd[156]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit* m; + sp_digit* r; + sp_digit e[1] = {0}; + int err = MP_OKAY; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(em) > 53) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + a = d; + r = a + 78 * 2; + m = r + 78 * 2; + } +#else + a = ad; + m = md; + r = rd; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 78, in, inLen); +#if DIGIT_BIT >= 53 + e[0] = (sp_digit)em->dp[0]; +#else + e[0] = (sp_digit)em->dp[0]; + if (em->used > 1) { + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; + } +#endif + if (e[0] == 0) { + err = MP_EXPTMOD_E; + } + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 78, mm); + + if (e[0] == 0x3) { + sp_4096_sqr_78(r, a); + err = sp_4096_mod_78(r, r, m); + if (err == MP_OKAY) { + sp_4096_mul_78(r, a, r); + err = sp_4096_mod_78(r, r, m); + } + } + else { + sp_digit* norm = r; + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + sp_4096_mul_78(a, a, norm); + err = sp_4096_mod_78(a, a, m); + + if (err == MP_OKAY) { + for (i=52; i>=0; i--) { + if ((e[0] >> i) != 0) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 156U); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_78(r, r, m, mp); + + if (((e[0] >> i) & 1) == 1) { + sp_4096_mont_mul_78(r, r, a, m, mp); + } + } + sp_4096_mont_reduce_78(r, m, mp); + mp = sp_4096_cmp_78(r, m); + sp_4096_cond_sub_78(r, r, m, ((mp < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) { + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } +#endif + + return err; +#endif /* WOLFSSL_SP_SMALL */ +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM) +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* a; + sp_digit* d = NULL; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 78 * 4, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = d + 78; + m = a + 78; + r = a; + + sp_4096_from_bin(a, 78, in, inLen); + sp_4096_from_mp(d, 78, dm); + sp_4096_from_mp(m, 78, mm); + err = sp_4096_mod_exp_78(r, a, d, 4096, m, 0); + } + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (d != NULL) { + XMEMSET(d, 0, sizeof(sp_digit) * 78); + XFREE(d, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[156], d[78], m[78]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)pm; + (void)qm; + (void)dpm; + (void)dqm; + (void)qim; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (mp_count_bits(dm) > 4096) { + err = MP_READ_E; + } + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 78, in, inLen); + sp_4096_from_mp(d, 78, dm); + sp_4096_from_mp(m, 78, mm); + err = sp_4096_mod_exp_78(r, a, d, 4096, m, 0); + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + + XMEMSET(d, 0, sizeof(sp_digit) * 78); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#else +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + sp_digit* t = NULL; + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmp; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 512) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 39 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) { + err = MEMORY_E; + } + } + if (err == MP_OKAY) { + a = t; + p = a + 78 * 2; + q = p + 39; + qi = dq = dp = q + 39; + tmpa = qi + 39; + tmpb = tmpa + 78; + + tmp = t; + r = tmp + 78; + + sp_4096_from_bin(a, 78, in, inLen); + sp_4096_from_mp(p, 39, pm); + sp_4096_from_mp(q, 39, qm); + sp_4096_from_mp(dp, 39, dpm); + err = sp_4096_mod_exp_39(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 39, dqm); + err = sp_4096_mod_exp_39(tmpb, a, dq, 2048, q, 1); + } + if (err == MP_OKAY) { + (void)sp_4096_sub_39(tmpa, tmpa, tmpb); + sp_4096_mask_39(tmp, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + (void)sp_4096_add_39(tmpa, tmpa, tmp); + + sp_4096_from_mp(qi, 39, qim); + sp_4096_mul_39(tmpa, tmpa, qi); + err = sp_4096_mod_39(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_39(tmpa, q, tmpa); + (void)sp_4096_add_78(r, tmpb, tmpa); + sp_4096_norm_78(r); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 39 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } + + return err; +#else + sp_digit a[78 * 2]; + sp_digit p[39], q[39], dp[39], dq[39], qi[39]; + sp_digit tmp[78], tmpa[78], tmpb[78]; + sp_digit* r = a; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512U) { + err = MP_TO_E; + } + if (err == MP_OKAY) { + if (inLen > 512U) { + err = MP_READ_E; + } + if (mp_count_bits(mm) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 78, in, inLen); + sp_4096_from_mp(p, 39, pm); + sp_4096_from_mp(q, 39, qm); + sp_4096_from_mp(dp, 39, dpm); + sp_4096_from_mp(dq, 39, dqm); + sp_4096_from_mp(qi, 39, qim); + + err = sp_4096_mod_exp_39(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + err = sp_4096_mod_exp_39(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + (void)sp_4096_sub_39(tmpa, tmpa, tmpb); + sp_4096_mask_39(tmp, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + (void)sp_4096_add_39(tmpa, tmpa, tmp); + sp_4096_mul_39(tmpa, tmpa, qi); + err = sp_4096_mod_39(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_39(tmpa, tmpa, q); + (void)sp_4096_add_78(r, tmpb, tmpa); + sp_4096_norm_78(r); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + + XMEMSET(tmpa, 0, sizeof(tmpa)); + XMEMSET(tmpb, 0, sizeof(tmpb)); + XMEMSET(p, 0, sizeof(p)); + XMEMSET(q, 0, sizeof(q)); + XMEMSET(dp, 0, sizeof(dp)); + XMEMSET(dq, 0, sizeof(dq)); + XMEMSET(qi, 0, sizeof(qi)); + + return err; +#endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ +#endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ +} + +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 53 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 78); + r->used = 78; + mp_clamp(r); +#elif DIGIT_BIT < 53 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 78; i++) { + r->dp[j] |= a[i] << s; + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + while (s + DIGIT_BIT <= 53) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = a[i] >> s; + } + } + s = 53 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 78; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 53 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 53 - s; + } + else { + s += 53; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 78 * 2; + m = e + 78; + r = b; + + sp_4096_from_mp(b, 78, base); + sp_4096_from_mp(e, 78, exp); + sp_4096_from_mp(m, 78, mod); + + err = sp_4096_mod_exp_78(r, b, e, mp_count_bits(exp), m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 78U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[156], ed[78], md[78]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + int err = MP_OKAY; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 78 * 2; + m = e + 78; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 78, base); + sp_4096_from_mp(e, 78, exp); + sp_4096_from_mp(m, 78, mod); + + err = sp_4096_mod_exp_78(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(sp_digit) * 78U); + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_DH); +#endif + + return err; +#endif +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +SP_NOINLINE static void sp_4096_lshift_78(sp_digit* r, sp_digit* a, byte n) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + r[78] = a[77] >> (53 - n); + for (i=77; i>0; i--) { + r[i] = ((a[i] << n) | (a[i-1] >> (53 - n))) & 0x1fffffffffffffL; + } +#else + sp_int_digit s, t; + + s = (sp_int_digit)a[77]; + r[78] = s >> (53U - n); + s = (sp_int_digit)(a[77]); t = (sp_int_digit)(a[76]); + r[77] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[76]); t = (sp_int_digit)(a[75]); + r[76] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[75]); t = (sp_int_digit)(a[74]); + r[75] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[74]); t = (sp_int_digit)(a[73]); + r[74] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[73]); t = (sp_int_digit)(a[72]); + r[73] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[72]); t = (sp_int_digit)(a[71]); + r[72] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[71]); t = (sp_int_digit)(a[70]); + r[71] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[70]); t = (sp_int_digit)(a[69]); + r[70] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[69]); t = (sp_int_digit)(a[68]); + r[69] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[68]); t = (sp_int_digit)(a[67]); + r[68] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[67]); t = (sp_int_digit)(a[66]); + r[67] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[66]); t = (sp_int_digit)(a[65]); + r[66] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[65]); t = (sp_int_digit)(a[64]); + r[65] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[64]); t = (sp_int_digit)(a[63]); + r[64] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[63]); t = (sp_int_digit)(a[62]); + r[63] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[62]); t = (sp_int_digit)(a[61]); + r[62] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[61]); t = (sp_int_digit)(a[60]); + r[61] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[60]); t = (sp_int_digit)(a[59]); + r[60] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[59]); t = (sp_int_digit)(a[58]); + r[59] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[58]); t = (sp_int_digit)(a[57]); + r[58] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[57]); t = (sp_int_digit)(a[56]); + r[57] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[56]); t = (sp_int_digit)(a[55]); + r[56] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[55]); t = (sp_int_digit)(a[54]); + r[55] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[54]); t = (sp_int_digit)(a[53]); + r[54] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[53]); t = (sp_int_digit)(a[52]); + r[53] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[52]); t = (sp_int_digit)(a[51]); + r[52] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[51]); t = (sp_int_digit)(a[50]); + r[51] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[50]); t = (sp_int_digit)(a[49]); + r[50] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[49]); t = (sp_int_digit)(a[48]); + r[49] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[48]); t = (sp_int_digit)(a[47]); + r[48] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[47]); t = (sp_int_digit)(a[46]); + r[47] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[46]); t = (sp_int_digit)(a[45]); + r[46] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[45]); t = (sp_int_digit)(a[44]); + r[45] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[44]); t = (sp_int_digit)(a[43]); + r[44] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[43]); t = (sp_int_digit)(a[42]); + r[43] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[42]); t = (sp_int_digit)(a[41]); + r[42] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]); + r[41] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]); + r[40] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]); + r[39] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]); + r[38] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]); + r[37] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]); + r[36] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]); + r[35] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]); + r[34] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]); + r[33] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]); + r[32] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]); + r[31] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]); + r[30] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]); + r[29] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]); + r[28] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]); + r[27] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]); + r[26] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]); + r[25] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]); + r[24] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]); + r[23] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]); + r[22] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]); + r[21] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]); + r[20] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]); + r[19] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]); + r[18] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]); + r[17] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]); + r[16] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]); + r[15] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]); + r[14] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]); + r[13] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]); + r[12] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]); + r[11] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]); + r[10] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]); + r[9] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]); + r[8] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]); + r[7] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]); + r[6] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]); + r[5] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]); + r[4] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]); + r[3] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]); + r[2] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; + s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]); + r[1] = ((s << n) | (t >> (53U - n))) & 0x1fffffffffffffUL; +#endif + r[0] = (a[0] << n) & 0x1fffffffffffffL; +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[156]; + sp_digit td[79]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 235, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 156; +#else + norm = nd; + tmp = td; +#endif + + XMEMSET(td, 0, sizeof(td)); + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_78(norm, m); + + bits = ((bits + 4) / 5) * 5; + i = ((bits + 52) / 53) - 1; + c = bits % 53; + if (c == 0) { + c = 53; + } + if (i < 78) { + n = e[i--] << (64 - c); + } + else { + n = 0; + i--; + } + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + sp_4096_lshift_78(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c < 5) { + n |= e[i--] << (11 - c); + c += 53; + } + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + + sp_4096_mont_sqr_78(r, r, m, mp); + sp_4096_mont_sqr_78(r, r, m, mp); + sp_4096_mont_sqr_78(r, r, m, mp); + sp_4096_mont_sqr_78(r, r, m, mp); + sp_4096_mont_sqr_78(r, r, m, mp); + + sp_4096_lshift_78(r, r, y); + sp_4096_mul_d_78(tmp, norm, (r[78] << 38) + (r[77] >> 15)); + r[78] = 0; + r[77] &= 0x7fffL; + (void)sp_4096_add_78(r, r, tmp); + sp_4096_norm_78(r); + o = sp_4096_cmp_78(r, m); + sp_4096_cond_sub_78(r, r, m, ((o < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + + sp_4096_mont_reduce_78(r, m, mp); + n = sp_4096_cmp_78(r, m); + sp_4096_cond_sub_78(r, r, m, ((n < 0) ? + (sp_digit)1 : (sp_digit)0) - 1); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} + +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ +#ifdef WOLFSSL_SP_SMALL + int err = MP_OKAY; + sp_digit* d = NULL; + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) { + err = MEMORY_E; + } + } + + if (err == MP_OKAY) { + b = d; + e = b + 78 * 2; + m = e + 78; + r = b; + + sp_4096_from_mp(b, 78, base); + sp_4096_from_bin(e, 78, exp, expLen); + sp_4096_from_mp(m, 78, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && + ((m[77] << 17) | (m[76] >> 36)) == 0xffffffffL) { + err = sp_4096_mod_exp_2_78(r, e, expLen * 8, m); + } + else + #endif + err = sp_4096_mod_exp_78(r, b, e, expLen * 8, m, 0); + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + if (d != NULL) { + XMEMSET(e, 0, sizeof(sp_digit) * 78U); + XFREE(d, NULL, DYNAMIC_TYPE_DH); + } + return err; +#else +#ifndef WOLFSSL_SMALL_STACK + sp_digit bd[156], ed[78], md[78]; +#else + sp_digit* d = NULL; +#endif + sp_digit* b; + sp_digit* e; + sp_digit* m; + sp_digit* r; + word32 i; + int err = MP_OKAY; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512U) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } +#ifdef WOLFSSL_SMALL_STACK + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(*d) * 78 * 4, NULL, DYNAMIC_TYPE_DH); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + b = d; + e = b + 78 * 2; + m = e + 78; + r = b; + } +#else + r = b = bd; + e = ed; + m = md; +#endif + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 78, base); + sp_4096_from_bin(e, 78, exp, expLen); + sp_4096_from_mp(m, 78, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2U && + ((m[77] << 17) | (m[76] >> 36)) == 0xffffffffL) { + err = sp_4096_mod_exp_2_78(r, e, expLen * 8U, m); + } + else { + #endif + err = sp_4096_mod_exp_78(r, b, e, expLen * 8U, m, 0); + #ifdef HAVE_FFDHE_4096 + } + #endif + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512U && out[i] == 0U; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + XMEMSET(e, 0, sizeof(sp_digit) * 78U); + +#ifdef WOLFSSL_SMALL_STACK + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_DH); +#endif + + return err; +#endif +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_ECC #ifndef WOLFSSL_SP_NO_256 diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index d760cc097..57b68bb10 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -1391,7 +1391,7 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, u += sp_2048_add_16(r + 8, r + 8, z1); r[24] = u; XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - sp_2048_add_16(r + 16, r + 16, z2); + (void)sp_2048_add_16(r + 16, r + 16, z2); } /* Square a and put result in r. (r = a * a) @@ -1418,7 +1418,7 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) u += sp_2048_add_16(r + 8, r + 8, z1); r[24] = u; XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1)); - sp_2048_add_16(r + 16, r + 16, z2); + (void)sp_2048_add_16(r + 16, r + 16, z2); } /* Sub b from a into r. (r = a - b) @@ -1780,7 +1780,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, u += sp_2048_add_32(r + 16, r + 16, z1); r[48] = u; XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - sp_2048_add_32(r + 32, r + 32, z2); + (void)sp_2048_add_32(r + 32, r + 32, z2); } /* Square a and put result in r. (r = a * a) @@ -1807,7 +1807,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) u += sp_2048_add_32(r + 16, r + 16, z1); r[48] = u; XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1)); - sp_2048_add_32(r + 32, r + 32, z2); + (void)sp_2048_add_32(r + 32, r + 32, z2); } /* Sub b from a into r. (r = a - b) @@ -2438,7 +2438,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, u += sp_2048_add_64(r + 32, r + 32, z1); r[96] = u; XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); - sp_2048_add_64(r + 64, r + 64, z2); + (void)sp_2048_add_64(r + 64, r + 64, z2); } /* Square a and put result in r. (r = a * a) @@ -2465,7 +2465,7 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) u += sp_2048_add_64(r + 32, r + 32, z1); r[96] = u; XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); - sp_2048_add_64(r + 64, r + 64, z2); + (void)sp_2048_add_64(r + 64, r + 64, z2); } #endif /* !WOLFSSL_SP_SMALL */ @@ -6087,7 +6087,7 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, u += sp_3072_add_24(r + 12, r + 12, z1); r[36] = u; XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - sp_3072_add_24(r + 24, r + 24, z2); + (void)sp_3072_add_24(r + 24, r + 24, z2); } /* Square a and put result in r. (r = a * a) @@ -6114,7 +6114,7 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) u += sp_3072_add_24(r + 12, r + 12, z1); r[36] = u; XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1)); - sp_3072_add_24(r + 24, r + 24, z2); + (void)sp_3072_add_24(r + 24, r + 24, z2); } /* Sub b from a into r. (r = a - b) @@ -6617,7 +6617,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, u += sp_3072_add_48(r + 24, r + 24, z1); r[72] = u; XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - sp_3072_add_48(r + 48, r + 48, z2); + (void)sp_3072_add_48(r + 48, r + 48, z2); } /* Square a and put result in r. (r = a * a) @@ -6644,7 +6644,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) u += sp_3072_add_48(r + 24, r + 24, z1); r[72] = u; XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1)); - sp_3072_add_48(r + 48, r + 48, z2); + (void)sp_3072_add_48(r + 48, r + 48, z2); } /* Sub b from a into r. (r = a - b) @@ -7542,7 +7542,7 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, u += sp_3072_add_96(r + 48, r + 48, z1); r[144] = u; XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); - sp_3072_add_96(r + 96, r + 96, z2); + (void)sp_3072_add_96(r + 96, r + 96, z2); } /* Square a and put result in r. (r = a * a) @@ -7569,7 +7569,7 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) u += sp_3072_add_96(r + 48, r + 48, z1); r[144] = u; XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1)); - sp_3072_add_96(r + 96, r + 96, z2); + (void)sp_3072_add_96(r + 96, r + 96, z2); } #endif /* !WOLFSSL_SP_SMALL */ @@ -10724,6 +10724,4181 @@ int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #endif /* !WOLFSSL_SP_NO_3072 */ +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 24U) { + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 32 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 32 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffff; + s = 32U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 32U) <= (word32)DIGIT_BIT) { + s += 32U; + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 32) { + r[j] &= 0xffffffff; + if (j + 1 >= size) { + break; + } + s = 32 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<128 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/ + if (j < 0) { + break; + } + while (b < 32) { + a[j--] = r[i] >> b; b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 32); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +#ifndef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mvn r7, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adds r4, r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "add %[r], %[r], #0x80\n\t" + "adds %[c], %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r7" + ); + + return c; +} + +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "mov r5, #0\n\t" + "sub r5, r5, %[c]\n\t" + "ldr r3, [%[a], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #0]\n\t" + "str r4, [%[a], #4]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r6, [%[b], #12]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #8]\n\t" + "str r4, [%[a], #12]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r6, [%[b], #20]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r6, [%[b], #28]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #24]\n\t" + "str r4, [%[a], #28]\n\t" + "ldr r3, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r6, [%[b], #36]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #32]\n\t" + "str r4, [%[a], #36]\n\t" + "ldr r3, [%[a], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r6, [%[b], #44]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #40]\n\t" + "str r4, [%[a], #44]\n\t" + "ldr r3, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r6, [%[b], #52]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #48]\n\t" + "str r4, [%[a], #52]\n\t" + "ldr r3, [%[a], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r6, [%[b], #60]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #56]\n\t" + "str r4, [%[a], #60]\n\t" + "ldr r3, [%[a], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r6, [%[b], #68]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #64]\n\t" + "str r4, [%[a], #68]\n\t" + "ldr r3, [%[a], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r6, [%[b], #76]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #72]\n\t" + "str r4, [%[a], #76]\n\t" + "ldr r3, [%[a], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r6, [%[b], #84]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #80]\n\t" + "str r4, [%[a], #84]\n\t" + "ldr r3, [%[a], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r6, [%[b], #92]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #88]\n\t" + "str r4, [%[a], #92]\n\t" + "ldr r3, [%[a], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r6, [%[b], #100]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #96]\n\t" + "str r4, [%[a], #100]\n\t" + "ldr r3, [%[a], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r6, [%[b], #108]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #104]\n\t" + "str r4, [%[a], #108]\n\t" + "ldr r3, [%[a], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r6, [%[b], #116]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #112]\n\t" + "str r4, [%[a], #116]\n\t" + "ldr r3, [%[a], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r6, [%[b], #124]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a], #120]\n\t" + "str r4, [%[a], #124]\n\t" + "sbc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + + return c; +} + +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r7, #0\n\t" + "mvn r7, r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adds r4, r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "add %[r], %[r], #0x80\n\t" + "adds %[c], %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "add %[r], %[r], #0x80\n\t" + "adds %[c], %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "add %[r], %[r], #0x80\n\t" + "adds %[c], %[c], r7\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[b], #0]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b], #4]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[b], #8]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[b], #12]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[b], #16]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[b], #20]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[b], #24]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[b], #28]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[b], #32]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[b], #36]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[b], #40]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #40]\n\t" + "ldr r4, [%[a], #44]\n\t" + "ldr r5, [%[b], #44]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[b], #48]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r5, [%[b], #52]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[b], #56]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #56]\n\t" + "ldr r4, [%[a], #60]\n\t" + "ldr r5, [%[b], #60]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[b], #64]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a], #68]\n\t" + "ldr r5, [%[b], #68]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[b], #72]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #72]\n\t" + "ldr r4, [%[a], #76]\n\t" + "ldr r5, [%[b], #76]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[b], #80]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #80]\n\t" + "ldr r4, [%[a], #84]\n\t" + "ldr r5, [%[b], #84]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[b], #88]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #88]\n\t" + "ldr r4, [%[a], #92]\n\t" + "ldr r5, [%[b], #92]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[b], #96]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #96]\n\t" + "ldr r4, [%[a], #100]\n\t" + "ldr r5, [%[b], #100]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[b], #104]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #104]\n\t" + "ldr r4, [%[a], #108]\n\t" + "ldr r5, [%[b], #108]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[b], #112]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #112]\n\t" + "ldr r4, [%[a], #116]\n\t" + "ldr r5, [%[b], #116]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[b], #120]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #120]\n\t" + "ldr r4, [%[a], #124]\n\t" + "ldr r5, [%[b], #124]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r], #124]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r7" + ); + + return c; +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[64 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r8\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "umull r6, r7, r6, r7\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit b1[64]; + sp_digit z2[128]; + sp_digit u, ca, cb; + + ca = sp_2048_add_64(a1, a, &a[64]); + cb = sp_2048_add_64(b1, b, &b[64]); + u = ca & cb; + sp_2048_mul_64(z1, a1, b1); + sp_2048_mul_64(z2, &a[64], &b[64]); + sp_2048_mul_64(z0, a, b); + sp_2048_mask_64(r + 128, a1, 0 - cb); + sp_2048_mask_64(b1, b1, 0 - ca); + u += sp_2048_add_64(r + 128, r + 128, b1); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #252\n\t" + "mov %[a], r8\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "umull r6, r7, r6, r7\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r7, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[128]; + sp_digit z1[128]; + sp_digit a1[64]; + sp_digit u; + + u = sp_2048_add_64(a1, a, &a[64]); + sp_2048_sqr_64(z1, a1); + sp_2048_sqr_64(z2, &a[64]); + sp_2048_sqr_64(z0, a); + sp_2048_mask_64(r + 128, a1, 0 - u); + u += sp_2048_add_64(r + 128, r + 128, r + 128); + u += sp_4096_sub_in_place_128(z1, z2); + u += sp_4096_sub_in_place_128(z1, z0); + u += sp_4096_add_128(r + 64, r + 64, z1); + r[192] = u; + XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1)); + (void)sp_4096_add_128(r + 128, r + 128, z2); +} + +#endif /* !WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r6, %[a]\n\t" + "mov r7, #0\n\t" + "mov r4, #2\n\t" + "lsl r4, r4, #8\n\t" + "sub r7, r7, #1\n\t" + "add r6, r6, r4\n\t" + "\n1:\n\t" + "adds %[c], %[c], r7\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" + "adcs r4, r4, r5\n\t" + "str r4, [%[r]]\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" + "cmp %[a], r6\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into a. (a -= b) + * + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, + const sp_digit* b) +{ + sp_digit c = 0; + __asm__ __volatile__ ( + "mov r7, %[a]\n\t" + "mov r5, #2\n\t" + "lsl r5, r5, #8\n\t" + "add r7, r7, r5\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r6, [%[b], #4]\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" + "str r3, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" + "cmp %[a], r7\n\t" + "bne 1b\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7" + ); + + return c; +} + +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit tmp[128 * 2]; + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r9, %[a]\n\t" + "mov r10, %[b]\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r9\n\t" + "mov r12, r6\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #252\n\t" + "mov %[a], r8\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov %[b], r8\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n2:\n\t" + /* Multiply Start */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" + "umull r6, r7, r6, r7\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" + "cmp %[a], r12\n\t" + "beq 3f\n\t" + "mov r6, r8\n\t" + "add r6, r6, r9\n\t" + "cmp %[a], r6\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r11\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add r7, r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[a], r9\n\t" + "mov %[b], r10\n\t" + : + : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + + XMEMCPY(r, tmp, sizeof(tmp)); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +{ + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "mov r5, #0\n\t" + "mov r8, r3\n\t" + "mov r11, %[r]\n\t" + "mov r6, #4\n\t" + "lsl r6, r6, #8\n\t" + "neg r6, r6\n\t" + "add sp, sp, r6\n\t" + "mov r10, sp\n\t" + "mov r9, %[a]\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #252\n\t" + "mov %[a], r8\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" + "mvn r6, r6\n\t" + "and %[a], %[a], r6\n\t" + "mov r2, r8\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n2:\n\t" + "cmp r2, %[a]\n\t" + "beq 4f\n\t" + /* Multiply * 2: Start */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [r2]\n\t" + "umull r6, r7, r6, r7\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ + "bal 5f\n\t" + "\n4:\n\t" + /* Square: Start */ + "ldr r6, [%[a]]\n\t" + "umull r6, r7, r6, r6\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ + "\n5:\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, r9\n\t" + "cmp %[a], r6\n\t" + "beq 3f\n\t" + "cmp %[a], r2\n\t" + "bgt 3f\n\t" + "mov r7, r8\n\t" + "add r7, r7, r9\n\t" + "cmp %[a], r7\n\t" + "ble 2b\n\t" + "\n3:\n\t" + "mov %[r], r10\n\t" + "mov r7, r8\n\t" + "str r3, [%[r], r7]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r7, r7, #4\n\t" + "mov r8, r7\n\t" + "mov r6, #3\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #248\n\t" + "cmp r7, r6\n\t" + "ble 1b\n\t" + "mov %[a], r9\n\t" + "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov %[a], r10\n\t" + "mov r3, #3\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #252\n\t" + "\n4:\n\t" + "ldr r6, [%[a], r3]\n\t" + "str r6, [%[r], r3]\n\t" + "subs r3, r3, #4\n\t" + "bge 4b\n\t" + "mov r6, #4\n\t" + "lsl r6, r6, #8\n\t" + "add sp, sp, r6\n\t" + : + : [r] "r" (r), [a] "r" (a) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +#endif /* WOLFSSL_SP_SMALL */ +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, + sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #2\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "\n1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + /* A[] * B */ + "ldr r6, [%[a]]\n\t" + "umull r6, r7, r6, %[b]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[] * B - Done */ + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], %[r], #4\n\t" + "add %[a], %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A signle precision number. + */ +static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 128); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_128(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #2\n\t" + "lsl r5, r5, #8\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, r6, %[m]\n\t" + "mov r5, #0\n\t" + "subs r5, r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_digit ca = 0; + + __asm__ __volatile__ ( + "mov r8, %[mp]\n\t" + "mov r12, %[ca]\n\t" + "mov r14, %[m]\n\t" + "mov r9, %[a]\n\t" + "mov r4, #0\n\t" + /* i = 0 */ + "mov r11, r4\n\t" + "\n1:\n\t" + "mov r5, #0\n\t" + "mov %[ca], #0\n\t" + /* mu = a[i] * mp */ + "mov %[mp], r8\n\t" + "ldr %[a], [%[a]]\n\t" + "mul %[mp], %[mp], %[a]\n\t" + "mov %[m], r14\n\t" + "mov r10, r9\n\t" + "\n2:\n\t" + /* a[i+j] += m[j] * mu */ + "mov %[a], r10\n\t" + "ldr %[a], [%[a]]\n\t" + "mov %[ca], #0\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]]\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adcs r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, %[ca]\n\t" + "mov %[a], r10\n\t" + "str r4, [%[a]]\n\t" + "mov r6, #4\n\t" + "add %[m], %[m], #4\n\t" + "add r10, r10, r6\n\t" + "mov r4, #1\n\t" + "lsl r4, r4, #8\n\t" + "add r4, r4, #252\n\t" + "add r4, r4, r9\n\t" + "cmp r10, r4\n\t" + "blt 2b\n\t" + /* a[i+127] += m[127] * mu */ + "mov %[ca], #0\n\t" + "mov r4, r12\n\t" + "mov %[a], #0\n\t" + /* Multiply m[127] and mu - Start */ + "ldr r7, [%[m]]\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc %[a], %[a], %[ca]\n\t" + /* Multiply m[127] and mu - Done */ + "mov %[ca], %[a]\n\t" + "mov %[a], r10\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr %[a], [%[a]]\n\t" + "mov r6, #0\n\t" + "adds r5, r5, %[a]\n\t" + "adcs r7, r7, r4\n\t" + "adc %[ca], %[ca], r6\n\t" + "mov %[a], r10\n\t" + "str r5, [%[a]]\n\t" + "str r7, [%[a], #4]\n\t" + /* i += 1 */ + "mov r6, #4\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" + "mov r12, %[ca]\n\t" + "mov %[a], r9\n\t" + "mov r4, #2\n\t" + "lsl r4, r4, #8\n\t" + "cmp r11, r4\n\t" + "blt 1b\n\t" + "mov %[m], r14\n\t" + : [ca] "+r" (ca), [a] "+r" (a) + : [m] "r" (m), [mp] "r" (mp) + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + ); + + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); +} + +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_128(r, a, b); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_128(r, a); + sp_4096_mont_reduce_128(r, m, mp); +} + +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, + sp_digit div) +{ + sp_digit r = 0; + + __asm__ __volatile__ ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r7, r4, #16\n\t" + "umull r4, r5, %[div], r7\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr #16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r7, r7, r4\n\t" + "mov %[r], r7\n\t" + : [r] "+r" (r) + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) + : "r4", "r5", "r6", "r7" + ); + return r; +} + +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<128; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 128; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) +{ + sp_digit r = 0; + + + __asm__ __volatile__ ( + "mov r3, #0\n\t" + "mvn r3, r3\n\t" + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, r6, #252\n\t" + "\n1:\n\t" + "ldr r7, [%[a], r6]\n\t" + "ldr r5, [%[b], r6]\n\t" + "and r7, r7, r3\n\t" + "and r5, r5, r3\n\t" + "mov r4, r7\n\t" + "subs r7, r7, r5\n\t" + "sbc r7, r7, r7\n\t" + "add %[r], %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r3, r7\n\t" + "subs r5, r5, r4\n\t" + "sbc r7, r7, r7\n\t" + "sub %[r], %[r], r7\n\t" + "mvn r7, r7\n\t" + "and r3, r3, r7\n\t" + "sub r6, r6, #4\n\t" + "cmp r6, #0\n\t" + "bge 1b\n\t" + : [r] "+r" (r) + : [a] "r" (a), [b] "r" (b) + : "r3", "r4", "r5", "r6", "r7" + ); + + return r; +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + sp_4096_mask_128(t2, d, t1[128 + i]); + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128(a, m, NULL, r); +} + +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[256], t2[129]; + sp_digit div, r1; + int i; + + (void)m; + + div = d[127]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 128); + for (i=127; i>=0; i--) { + r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div); + + sp_4096_mul_d_128(t2, d, r1); + t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2); + t1[128 + i] -= t2[128]; + if (t1[128 + i] != 0) { + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + if (t1[128 + i] != 0) + t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d); + } + } + + r1 = sp_4096_cmp_128(t1, d) >= 0; + sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_128_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \ + defined(WOLFSSL_HAVE_SP_DH) +#ifdef WOLFSSL_SP_SMALL +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[16][256]; +#else + sp_digit* t[16]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<16; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 4; + if (c == 32) { + c = 28; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=4; ) { + if (c == 0) { + n = e[i--]; + y = n >> 28; + n <<= 4; + c = 28; + } + else if (c < 4) { + y = n >> 28; + n = e[i--]; + c = 4 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 28) & 0xf; + n <<= 4; + c -= 4; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#else +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][256]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) { + t[i] = td + i * 256; + } +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 128U); + if (reduceA != 0) { + err = sp_4096_mod_128(t[1] + 128, a, m); + if (err == MP_OKAY) { + err = sp_4096_mod_128(t[1], t[1], m); + } + } + else { + XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128); + err = sp_4096_mod_128(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_128(t[10], t[ 5], m, mp); + sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_128(t[12], t[ 6], m, mp); + sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_128(t[14], t[ 7], m, mp); + sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_128(t[16], t[ 8], m, mp); + sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_128(t[18], t[ 9], m, mp); + sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_128(t[20], t[10], m, mp); + sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_128(t[22], t[11], m, mp); + sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_128(t[24], t[12], m, mp); + sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_128(t[26], t[13], m, mp); + sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_128(t[28], t[14], m, mp); + sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_128(t[30], t[15], m, mp); + sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 128); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_mont_mul_128(r, r, t[y], m, mp); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* WOLFSSL_SP_SMALL */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[256], md[128], rd[256]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit *ah; + sp_digit* m; + sp_digit* r; + sp_digit e[1]; + int err = MP_OKAY; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 128 * 2; + m = r + 128 * 2; + ah = a + 128; + } +#else + a = ad; + m = md; + r = rd; + ah = a + 128; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(ah, 128, in, inLen); +#if DIGIT_BIT >= 32 + e[0] = em->dp[0]; +#else + e[0] = em->dp[0]; + if (em->used > 1) + e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT; +#endif + if (e[0] == 0) + err = MP_EXPTMOD_E; + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 128, mm); + + if (e[0] == 0x3) { + if (err == MP_OKAY) { + sp_4096_sqr_128(r, ah); + err = sp_4096_mod_128_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_128(r, ah, r); + err = sp_4096_mod_128_cond(r, r, m); + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 128); + err = sp_4096_mod_128_cond(a, a, m); + + if (err == MP_OKAY) { + for (i=31; i>=0; i--) + if (e[0] >> i) + break; + + XMEMCPY(r, a, sizeof(sp_digit) * 128); + for (i--; i>=0; i--) { + sp_4096_mont_sqr_128(r, r, m, mp); + if (((e[0] >> i) & 1) == 1) + sp_4096_mont_mul_128(r, r, a, m, mp); + } + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128); + sp_4096_mont_reduce_128(r, m, mp); + + for (i = 127; i > 0; i--) { + if (r[i] != m[i]) + break; + } + if (r[i] >= m[i]) + sp_4096_sub_in_place_128(r, m); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_RSA); +#endif + + return err; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[128 * 2]; + sp_digit pd[64], qd[64], dpd[64]; + sp_digit tmpad[128], tmpbd[128]; +#else + sp_digit* t = NULL; +#endif + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmp; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + sp_digit c; + int err = MP_OKAY; + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 128 * 2; + q = p + 64; + qi = dq = dp = q + 64; + tmpa = qi + 64; + tmpb = tmpa + 128; + + tmp = t; + r = tmp + 128; + } +#else + r = a = ad; + p = pd; + q = qd; + qi = dq = dp = dpd; + tmpa = tmpad; + tmpb = tmpbd; + tmp = a + 128; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 128, in, inLen); + sp_4096_from_mp(p, 64, pm); + sp_4096_from_mp(q, 64, qm); + sp_4096_from_mp(dp, 64, dpm); + + err = sp_4096_mod_exp_64(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 64, dqm); + err = sp_4096_mod_exp_64(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_4096_sub_in_place_64(tmpa, tmpb); + sp_4096_mask_64(tmp, p, c); + sp_4096_add_64(tmpa, tmpa, tmp); + + sp_4096_from_mp(qi, 64, qim); + sp_4096_mul_64(tmpa, tmpa, qi); + err = sp_4096_mod_64(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { + sp_4096_mul_64(tmpa, q, tmpa); + XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64); + sp_4096_add_128(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpad, 0, sizeof(tmpad)); + XMEMSET(tmpbd, 0, sizeof(tmpbd)); + XMEMSET(pd, 0, sizeof(pd)); + XMEMSET(qd, 0, sizeof(qd)); + XMEMSET(dpd, 0, sizeof(dpd)); +#endif + + return err; +} +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 32 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 128); + r->used = 128; + mp_clamp(r); +#elif DIGIT_BIT < 32 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= a[i] << s; + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + while (s + DIGIT_BIT <= 32) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = a[i] >> s; + } + } + s = 32 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 128; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 32 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 32 - s; + } + else { + s += 32; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expBits > 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_mp(e, 128, exp); + sp_4096_from_mp(m, 128, mod); + + err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH + +#ifdef HAVE_FFDHE_4096 +static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n) +{ + __asm__ __volatile__ ( + "mov r6, #31\n\t" + "sub r6, r6, %[n]\n\t" + "add %[a], %[a], #448\n\t" + "add %[r], %[r], #448\n\t" + "ldr r3, [%[a], #60]\n\t" + "lsr r4, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r4, r4, r6\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #56]\n\t" + "str r2, [%[r], #64]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #44]\n\t" + "str r2, [%[r], #52]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #32]\n\t" + "str r2, [%[r], #40]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #20]\n\t" + "str r2, [%[r], #28]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #8]\n\t" + "str r2, [%[r], #16]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #0]\n\t" + "str r3, [%[r], #8]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r3, [%[a], #60]\n\t" + "str r2, [%[r], #68]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #48]\n\t" + "str r2, [%[r], #56]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #36]\n\t" + "str r2, [%[r], #44]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #24]\n\t" + "str r2, [%[r], #32]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #12]\n\t" + "str r2, [%[r], #20]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #0]\n\t" + "str r2, [%[r], #8]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "sub %[a], %[a], #64\n\t" + "sub %[r], %[r], #64\n\t" + "ldr r2, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #52]\n\t" + "str r2, [%[r], #60]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #40]\n\t" + "str r2, [%[r], #48]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #28]\n\t" + "str r2, [%[r], #36]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #16]\n\t" + "str r2, [%[r], #24]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" + "lsr r5, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r2, r2, r5\n\t" + "ldr r3, [%[a], #4]\n\t" + "str r2, [%[r], #12]\n\t" + "lsr r5, r3, #1\n\t" + "lsl r3, r3, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r4, r4, r5\n\t" + "ldr r2, [%[a], #0]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r5, r2, #1\n\t" + "lsl r2, r2, %[n]\n\t" + "lsr r5, r5, r6\n\t" + "orr r3, r3, r5\n\t" + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) + : "memory", "r2", "r3", "r4", "r5", "r6" + ); +} + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[256]; + sp_digit td[129]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 256; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_128(norm, m); + + i = (bits - 1) / 32; + n = e[i--]; + c = bits & 31; + if (c == 0) { + c = 32; + } + c -= bits % 5; + if (c == 32) { + c = 27; + } + y = (int)(n >> c); + n <<= 32 - c; + sp_4096_lshift_128(r, norm, y); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = n >> 27; + n <<= 5; + c = 27; + } + else if (c < 5) { + y = n >> 27; + n = e[i--]; + c = 5 - c; + y |= n >> (32 - c); + n <<= c; + c = 32 - c; + } + else { + y = (n >> 27) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + sp_4096_mont_sqr_128(r, r, m, mp); + + sp_4096_lshift_128(r, r, y); + sp_4096_mul_d_128(tmp, norm, r[128]); + r[128] = 0; + o = sp_4096_add_128(r, r, tmp); + sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U); + sp_4096_mont_reduce_128(r, m, mp); + + mask = 0 - (sp_4096_cmp_128(r, m) >= 0); + sp_4096_cond_sub_128(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) { + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } +#endif + + return err; +} +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[256], e[128], m[128]; + sp_digit* r = b; + word32 i; + + if (mp_count_bits(base) > 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + if (expLen > 512) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + if (mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 128, base); + sp_4096_from_bin(e, 128, exp, expLen); + sp_4096_from_mp(m, 128, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1) + err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m); + else + #endif + err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0); + + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif /* WOLFSSL_HAVE_SP_DH */ + +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_ECC #ifndef WOLFSSL_SP_NO_256 diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index 9473b4332..05f649f7e 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -1657,17 +1657,22 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, } else #endif + { sp_2048_mul_16(tmpa, tmpa, qi); + } err = sp_2048_mod_16(tmpa, tmpa, p); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { sp_2048_mul_avx2_16(tmpa, q, tmpa); + } else #endif + { sp_2048_mul_16(tmpa, q, tmpa); + } XMEMSET(&tmpb[16], 0, sizeof(sp_digit) * 16); sp_2048_add_32(r, tmpb, tmpa); @@ -3748,17 +3753,22 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, } else #endif + { sp_3072_mul_24(tmpa, tmpa, qi); + } err = sp_3072_mod_24(tmpa, tmpa, p); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { sp_3072_mul_avx2_24(tmpa, q, tmpa); + } else #endif + { sp_3072_mul_24(tmpa, q, tmpa); + } XMEMSET(&tmpb[24], 0, sizeof(sp_digit) * 24); sp_3072_add_48(r, tmpb, tmpa); @@ -4230,6 +4240,1566 @@ int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #endif /* !WOLFSSL_SP_NO_3072 */ +#ifdef WOLFSSL_SP_4096 +/* Read big endian unsigned byte array into r. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a Byte array. + * n Number of bytes in array to read. + */ +static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n) +{ + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = n-1; i >= 0; i--) { + r[j] |= (((sp_digit)a[i]) << s); + if (s >= 56U) { + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + r[++j] = (sp_digit)a[i] >> s; + s = 8U - s; + } + else { + s += 8U; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +} + +/* Convert an mp_int to an array of sp_digit. + * + * r A single precision integer. + * size Maximum number of bytes to convert + * a A multi-precision integer. + */ +static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a) +{ +#if DIGIT_BIT == 64 + int j; + + XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); + + for (j = a->used; j < size; j++) { + r[j] = 0; + } +#elif DIGIT_BIT > 64 + int i, j = 0; + word32 s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i] << s); + r[j] &= 0xffffffffffffffffl; + s = 64U - s; + if (j + 1 >= size) { + break; + } + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + while ((s + 64U) <= (word32)DIGIT_BIT) { + s += 64U; + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + if (s < (word32)DIGIT_BIT) { + /* lint allow cast of mismatch word32 and mp_digit */ + r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/ + } + else { + r[++j] = 0L; + } + } + s = (word32)DIGIT_BIT - s; + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#else + int i, j = 0, s = 0; + + r[0] = 0; + for (i = 0; i < a->used && j < size; i++) { + r[j] |= ((sp_digit)a->dp[i]) << s; + if (s + DIGIT_BIT >= 64) { + r[j] &= 0xffffffffffffffffl; + if (j + 1 >= size) { + break; + } + s = 64 - s; + if (s == DIGIT_BIT) { + r[++j] = 0; + s = 0; + } + else { + r[++j] = a->dp[i] >> s; + s = DIGIT_BIT - s; + } + } + else { + s += DIGIT_BIT; + } + } + + for (j++; j < size; j++) { + r[j] = 0; + } +#endif +} + +/* Write r as big endian to byte array. + * Fixed length number of bytes written: 512 + * + * r A single precision integer. + * a Byte array. + */ +static void sp_4096_to_bin(sp_digit* r, byte* a) +{ + int i, j, s = 0, b; + + j = 4096 / 8 - 1; + a[j] = 0; + for (i=0; i<64 && j>=0; i++) { + b = 0; + /* lint allow cast of mismatch sp_digit and int */ + a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/ + if (j < 0) { + break; + } + while (b < 64) { + a[j--] = r[i] >> b; b += 8; + if (j < 0) { + break; + } + } + s = 8 - (b - 64); + if (j >= 0) { + a[j] = 0; + } + if (s != 0) { + j++; + } + } +} + +extern sp_digit sp_4096_sub_in_place_64(sp_digit* a, const sp_digit* b); +extern sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b); +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit b1[32]; + sp_digit z2[64]; + sp_digit u, ca, cb; + + ca = sp_2048_add_32(a1, a, &a[32]); + cb = sp_2048_add_32(b1, b, &b[32]); + u = ca & cb; + sp_2048_mul_32(z1, a1, b1); + sp_2048_mul_32(z2, &a[32], &b[32]); + sp_2048_mul_32(z0, a, b); + sp_2048_mask_32(r + 64, a1, 0 - cb); + sp_2048_mask_32(b1, b1, 0 - ca); + u += sp_2048_add_32(r + 64, r + 64, b1); + u += sp_4096_sub_in_place_64(z1, z2); + u += sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + sp_4096_add_64(r + 64, r + 64, z2); +} + +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[64]; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit u; + + u = sp_2048_add_32(a1, a, &a[32]); + sp_2048_sqr_32(z1, a1); + sp_2048_sqr_32(z2, &a[32]); + sp_2048_sqr_32(z0, a); + sp_2048_mask_32(r + 64, a1, 0 - u); + u += sp_2048_add_32(r + 64, r + 64, r + 64); + u += sp_4096_sub_in_place_64(z1, z2); + u += sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + sp_4096_add_64(r + 64, r + 64, z2); +} + +#ifdef HAVE_INTEL_AVX2 +/* Multiply a and b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static void sp_4096_mul_avx2_64(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + sp_digit* z0 = r; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit b1[32]; + sp_digit z2[64]; + sp_digit u, ca, cb; + + ca = sp_2048_add_32(a1, a, &a[32]); + cb = sp_2048_add_32(b1, b, &b[32]); + u = ca & cb; + sp_2048_mul_avx2_32(z1, a1, b1); + sp_2048_mul_avx2_32(z2, &a[32], &b[32]); + sp_2048_mul_avx2_32(z0, a, b); + sp_2048_mask_32(r + 64, a1, 0 - cb); + sp_2048_mask_32(b1, b1, 0 - ca); + u += sp_2048_add_32(r + 64, r + 64, b1); + u += sp_4096_sub_in_place_64(z1, z2); + u += sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + sp_4096_add_64(r + 64, r + 64, z2); +} +#endif /* HAVE_INTEL_AVX2 */ + +#ifdef HAVE_INTEL_AVX2 +/* Square a and put result in r. (r = a * a) + * + * r A single precision integer. + * a A single precision integer. + */ +SP_NOINLINE static void sp_4096_sqr_avx2_64(sp_digit* r, const sp_digit* a) +{ + sp_digit* z0 = r; + sp_digit z2[64]; + sp_digit z1[64]; + sp_digit a1[32]; + sp_digit u; + + u = sp_2048_add_32(a1, a, &a[32]); + sp_2048_sqr_avx2_32(z1, a1); + sp_2048_sqr_avx2_32(z2, &a[32]); + sp_2048_sqr_avx2_32(z0, a); + sp_2048_mask_32(r + 64, a1, 0 - u); + u += sp_2048_add_32(r + 64, r + 64, r + 64); + u += sp_4096_sub_in_place_64(z1, z2); + u += sp_4096_sub_in_place_64(z1, z0); + u += sp_4096_add_64(r + 32, r + 32, z1); + r[96] = u; + XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1)); + sp_4096_add_64(r + 64, r + 64, z2); +} +#endif /* HAVE_INTEL_AVX2 */ + +/* Caclulate the bottom digit of -1/a mod 2^n. + * + * a A single precision number. + * rho Bottom word of inverse. + */ +static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) +{ + sp_digit x, b; + + b = a[0]; + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ + + /* rho = -1/m mod b */ + *rho = -x; +} + +extern void sp_4096_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b); +#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) +/* r = 2^n mod m where n is the number of bits to reduce by. + * Given m must be 4096 bits, just need to subtract. + * + * r A single precision number. + * m A signle precision number. + */ +static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m) +{ + XMEMSET(r, 0, sizeof(sp_digit) * 64); + + /* r = 2^n mod m */ + sp_4096_sub_in_place_64(r, m); +} + +#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ +extern sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m); +extern void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp); +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_64(r, a, b); + sp_4096_mont_reduce_64(r, m, mp); +} + +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_64(r, a); + sp_4096_mont_reduce_64(r, m, mp); +} + +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +extern void sp_4096_mul_d_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit b); +/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The dividend. + * returns the result of the division. + */ +static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, + sp_digit div) +{ + register sp_digit r asm("rax"); + __asm__ __volatile__ ( + "divq %3" + : "=a" (r) + : "d" (d1), "a" (d0), "r" (div) + : + ); + return r; +} +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) { + r[i] = a[i] & m; + } +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + +extern int64_t sp_4096_cmp_64(const sp_digit* a, const sp_digit* b); +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_4096_mul_d_avx2_64(t2, d, r1); + else +#endif + sp_4096_mul_d_64(t2, d, r1); + t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + sp_4096_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2); + sp_4096_mask_64(t2, d, t1[64 + i]); + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], t2); + } + + r1 = sp_4096_cmp_64(t1, d) >= 0; + sp_4096_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_64(a, m, NULL, r); +} + +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +/* Divide d in a and put remainder into r (m*d + r = a) + * m is not calculated as it is not needed at this time. + * + * a Nmber to be divided. + * d Number to divide with. + * m Multiplier result. + * r Remainder from the division. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, + sp_digit* r) +{ + sp_digit t1[128], t2[65]; + sp_digit div, r1; + int i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)m; + + div = d[63]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 64); + for (i=63; i>=0; i--) { + r1 = div_4096_word_64(t1[64 + i], t1[64 + i - 1], div); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + sp_4096_mul_d_avx2_64(t2, d, r1); + else +#endif + sp_4096_mul_d_64(t2, d, r1); + t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2); + t1[64 + i] -= t2[64]; + if (t1[64 + i] != 0) { + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d); + if (t1[64 + i] != 0) + t1[64 + i] += sp_4096_add_64(&t1[i], &t1[i], d); + } + } + + r1 = sp_4096_cmp_64(t1, d) >= 0; + sp_4096_cond_sub_64(r, t1, d, (sp_digit)0 - r1); + + return MP_OKAY; +} + +/* Reduce a modulo m into r. (r = a mod m) + * + * r A single precision number that is the reduced result. + * a A single precision number that is to be reduced. + * m A single precision number that is the modulus to reduce with. + * returns MP_OKAY indicating success. + */ +static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +{ + return sp_4096_div_64_cond(a, m, NULL, r); +} + +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][128]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 128; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64); + if (reduceA) { + err = sp_4096_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) + err = sp_4096_mod_64(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_4096_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_64(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_64(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_64(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_64(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_64(t[10], t[ 5], m, mp); + sp_4096_mont_mul_64(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_64(t[12], t[ 6], m, mp); + sp_4096_mont_mul_64(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_64(t[14], t[ 7], m, mp); + sp_4096_mont_mul_64(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_64(t[16], t[ 8], m, mp); + sp_4096_mont_mul_64(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_64(t[18], t[ 9], m, mp); + sp_4096_mont_mul_64(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_64(t[20], t[10], m, mp); + sp_4096_mont_mul_64(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_64(t[22], t[11], m, mp); + sp_4096_mont_mul_64(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_64(t[24], t[12], m, mp); + sp_4096_mont_mul_64(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_64(t[26], t[13], m, mp); + sp_4096_mont_mul_64(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_64(t[28], t[14], m, mp); + sp_4096_mont_mul_64(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_64(t[30], t[15], m, mp); + sp_4096_mont_mul_64(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) + c = 64; + c -= bits % 5; + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else if (c < 5) { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + + sp_4096_mont_mul_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +extern void sp_4096_mont_reduce_avx2_64(sp_digit* a, const sp_digit* m, sp_digit mp); +#ifdef HAVE_INTEL_AVX2 +/* Multiply two Montogmery form numbers mod the modulus (prime). + * (r = a * b mod m) + * + * r Result of multiplication. + * a First number to multiply in Montogmery form. + * b Second number to multiply in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_mul_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + const sp_digit* m, sp_digit mp) +{ + sp_4096_mul_avx2_64(r, a, b); + sp_4096_mont_reduce_avx2_64(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#ifdef HAVE_INTEL_AVX2 +/* Square the Montgomery form number. (r = a * a mod m) + * + * r Result of squaring. + * a Number to square in Montogmery form. + * m Modulus (prime). + * mp Montogmery mulitplier. + */ +static void sp_4096_mont_sqr_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* m, + sp_digit mp) +{ + sp_4096_sqr_avx2_64(r, a); + sp_4096_mont_reduce_avx2_64(r, m, mp); +} + +#endif /* HAVE_INTEL_AVX2 */ +#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate a to the e mod m. (r = a^e mod m) + * + * r A single precision number that is the result of the operation. + * a A single precision number being exponentiated. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* e, + int bits, const sp_digit* m, int reduceA) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit t[32][128]; +#else + sp_digit* t[32]; + sp_digit* td; +#endif + sp_digit* norm; + sp_digit mp = 1; + sp_digit n; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + for (i=0; i<32; i++) + t[i] = td + i * 128; +#endif + norm = t[0]; + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + XMEMSET(t[1], 0, sizeof(sp_digit) * 64); + if (reduceA) { + err = sp_4096_mod_64(t[1] + 64, a, m); + if (err == MP_OKAY) + err = sp_4096_mod_64(t[1], t[1], m); + } + else { + XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64); + err = sp_4096_mod_64(t[1], t[1], m); + } + } + + if (err == MP_OKAY) { + sp_4096_mont_sqr_avx2_64(t[ 2], t[ 1], m, mp); + sp_4096_mont_mul_avx2_64(t[ 3], t[ 2], t[ 1], m, mp); + sp_4096_mont_sqr_avx2_64(t[ 4], t[ 2], m, mp); + sp_4096_mont_mul_avx2_64(t[ 5], t[ 3], t[ 2], m, mp); + sp_4096_mont_sqr_avx2_64(t[ 6], t[ 3], m, mp); + sp_4096_mont_mul_avx2_64(t[ 7], t[ 4], t[ 3], m, mp); + sp_4096_mont_sqr_avx2_64(t[ 8], t[ 4], m, mp); + sp_4096_mont_mul_avx2_64(t[ 9], t[ 5], t[ 4], m, mp); + sp_4096_mont_sqr_avx2_64(t[10], t[ 5], m, mp); + sp_4096_mont_mul_avx2_64(t[11], t[ 6], t[ 5], m, mp); + sp_4096_mont_sqr_avx2_64(t[12], t[ 6], m, mp); + sp_4096_mont_mul_avx2_64(t[13], t[ 7], t[ 6], m, mp); + sp_4096_mont_sqr_avx2_64(t[14], t[ 7], m, mp); + sp_4096_mont_mul_avx2_64(t[15], t[ 8], t[ 7], m, mp); + sp_4096_mont_sqr_avx2_64(t[16], t[ 8], m, mp); + sp_4096_mont_mul_avx2_64(t[17], t[ 9], t[ 8], m, mp); + sp_4096_mont_sqr_avx2_64(t[18], t[ 9], m, mp); + sp_4096_mont_mul_avx2_64(t[19], t[10], t[ 9], m, mp); + sp_4096_mont_sqr_avx2_64(t[20], t[10], m, mp); + sp_4096_mont_mul_avx2_64(t[21], t[11], t[10], m, mp); + sp_4096_mont_sqr_avx2_64(t[22], t[11], m, mp); + sp_4096_mont_mul_avx2_64(t[23], t[12], t[11], m, mp); + sp_4096_mont_sqr_avx2_64(t[24], t[12], m, mp); + sp_4096_mont_mul_avx2_64(t[25], t[13], t[12], m, mp); + sp_4096_mont_sqr_avx2_64(t[26], t[13], m, mp); + sp_4096_mont_mul_avx2_64(t[27], t[14], t[13], m, mp); + sp_4096_mont_sqr_avx2_64(t[28], t[14], m, mp); + sp_4096_mont_mul_avx2_64(t[29], t[15], t[14], m, mp); + sp_4096_mont_sqr_avx2_64(t[30], t[15], m, mp); + sp_4096_mont_mul_avx2_64(t[31], t[16], t[15], m, mp); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) + c = 64; + c -= bits % 5; + y = (int)(n >> c); + n <<= 64 - c; + XMEMCPY(r, t[y], sizeof(sp_digit) * 64); + for (; i>=0 || c>=5; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 59); + n <<= 5; + c = 59; + } + else if (c < 5) { + y = (int)(n >> 59); + n = e[i--]; + c = 5 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 59) & 0x1f; + n <<= 5; + c -= 5; + } + + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + + sp_4096_mont_mul_avx2_64(r, r, t[y], m, mp); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_avx2_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ +#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */ + +#ifdef WOLFSSL_HAVE_SP_RSA +/* RSA public key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * em Public exponent. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[128], md[64], rd[128]; +#else + sp_digit* d = NULL; +#endif + sp_digit* a; + sp_digit *ah; + sp_digit* m; + sp_digit* r; + sp_digit e = 0; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (mp_count_bits(em) > 64 || inLen > 512 || + mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL, + DYNAMIC_TYPE_RSA); + if (d == NULL) + err = MEMORY_E; + } + + if (err == MP_OKAY) { + a = d; + r = a + 64 * 2; + m = r + 64 * 2; + ah = a + 64; + } +#else + a = ad; + m = md; + r = rd; + ah = a + 64; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(ah, 64, in, inLen); +#if DIGIT_BIT >= 64 + e = em->dp[0]; +#else + e = em->dp[0]; + if (em->used > 1) + e |= ((sp_digit)em->dp[1]) << DIGIT_BIT; +#endif + if (e == 0) + err = MP_EXPTMOD_E; + } + if (err == MP_OKAY) { + sp_4096_from_mp(m, 64, mm); + + if (e == 0x3) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + if (err == MP_OKAY) { + sp_4096_sqr_avx2_64(r, ah); + err = sp_4096_mod_64_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_avx2_64(r, ah, r); + err = sp_4096_mod_64_cond(r, r, m); + } + } + else +#endif + { + if (err == MP_OKAY) { + sp_4096_sqr_64(r, ah); + err = sp_4096_mod_64_cond(r, r, m); + } + if (err == MP_OKAY) { + sp_4096_mul_64(r, ah, r); + err = sp_4096_mod_64_cond(r, r, m); + } + } + } + else { + int i; + sp_digit mp; + + sp_4096_mont_setup(m, &mp); + + /* Convert to Montgomery form. */ + XMEMSET(a, 0, sizeof(sp_digit) * 64); + err = sp_4096_mod_64_cond(a, a, m); + + if (err == MP_OKAY) { + for (i=63; i>=0; i--) { + if (e >> i) { + break; + } + } + + XMEMCPY(r, a, sizeof(sp_digit) * 64); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + for (i--; i>=0; i--) { + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + if (((e >> i) & 1) == 1) { + sp_4096_mont_mul_avx2_64(r, r, a, m, mp); + } + } + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_avx2_64(r, m, mp); + } + else +#endif + { + for (i--; i>=0; i--) { + sp_4096_mont_sqr_64(r, r, m, mp); + if (((e >> i) & 1) == 1) { + sp_4096_mont_mul_64(r, r, a, m, mp); + } + } + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_64(r, m, mp); + } + + for (i = 63; i > 0; i--) { + if (r[i] != m[i]) + break; + } + if (r[i] >= m[i]) + sp_4096_sub_in_place_64(r, m); + } + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (d != NULL) + XFREE(d, NULL, DYNAMIC_TYPE_RSA); +#endif + + return err; +} + +/* RSA private key operation. + * + * in Array of bytes representing the number to exponentiate, base. + * inLen Number of bytes in base. + * dm Private exponent. + * pm First prime. + * qm Second prime. + * dpm First prime's CRT exponent. + * dqm Second prime's CRT exponent. + * qim Inverse of second prime mod p. + * mm Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Number of bytes in result. + * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when + * an array is too long and MEMORY_E when dynamic memory allocation fails. + */ +int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, + mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, + byte* out, word32* outLen) +{ +#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) + sp_digit ad[64 * 2]; + sp_digit pd[32], qd[32], dpd[32]; + sp_digit tmpad[64], tmpbd[64]; +#else + sp_digit* t = NULL; +#endif + sp_digit* a; + sp_digit* p; + sp_digit* q; + sp_digit* dp; + sp_digit* dq; + sp_digit* qi; + sp_digit* tmp; + sp_digit* tmpa; + sp_digit* tmpb; + sp_digit* r; + sp_digit c; + int err = MP_OKAY; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + (void)dm; + (void)mm; + + if (*outLen < 512) + err = MP_TO_E; + if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096)) + err = MP_READ_E; + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (err == MP_OKAY) { + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL, + DYNAMIC_TYPE_RSA); + if (t == NULL) + err = MEMORY_E; + } + if (err == MP_OKAY) { + a = t; + p = a + 64 * 2; + q = p + 32; + qi = dq = dp = q + 32; + tmpa = qi + 32; + tmpb = tmpa + 64; + + tmp = t; + r = tmp + 64; + } +#else + r = a = ad; + p = pd; + q = qd; + qi = dq = dp = dpd; + tmpa = tmpad; + tmpb = tmpbd; + tmp = a + 64; +#endif + + if (err == MP_OKAY) { + sp_4096_from_bin(a, 64, in, inLen); + sp_4096_from_mp(p, 32, pm); + sp_4096_from_mp(q, 32, qm); + sp_4096_from_mp(dp, 32, dpm); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_avx2_32(tmpa, a, dp, 2048, p, 1); + else +#endif + err = sp_2048_mod_exp_32(tmpa, a, dp, 2048, p, 1); + } + if (err == MP_OKAY) { + sp_4096_from_mp(dq, 32, dqm); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_2048_mod_exp_avx2_32(tmpb, a, dq, 2048, q, 1); + else +#endif + err = sp_2048_mod_exp_32(tmpb, a, dq, 2048, q, 1); + } + + if (err == MP_OKAY) { + c = sp_2048_sub_in_place_32(tmpa, tmpb); + sp_2048_mask_32(tmp, p, c); + sp_2048_add_32(tmpa, tmpa, tmp); + + sp_2048_from_mp(qi, 32, qim); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_2048_mul_avx2_32(tmpa, tmpa, qi); + } + else +#endif + { + sp_2048_mul_32(tmpa, tmpa, qi); + } + err = sp_2048_mod_32(tmpa, tmpa, p); + } + + if (err == MP_OKAY) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + sp_2048_mul_avx2_32(tmpa, q, tmpa); + } + else +#endif + { + sp_2048_mul_32(tmpa, q, tmpa); + } + XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32); + sp_4096_add_64(r, tmpb, tmpa); + + sp_4096_to_bin(r, out); + *outLen = 512; + } + +#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) + if (t != NULL) { + XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11); + XFREE(t, NULL, DYNAMIC_TYPE_RSA); + } +#else + XMEMSET(tmpad, 0, sizeof(tmpad)); + XMEMSET(tmpbd, 0, sizeof(tmpbd)); + XMEMSET(pd, 0, sizeof(pd)); + XMEMSET(qd, 0, sizeof(qd)); + XMEMSET(dpd, 0, sizeof(dpd)); +#endif + + return err; +} +#endif /* WOLFSSL_HAVE_SP_RSA */ +#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* Convert an array of sp_digit to an mp_int. + * + * a A single precision integer. + * r A multi-precision integer. + */ +static int sp_4096_to_mp(const sp_digit* a, mp_int* r) +{ + int err; + + err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT); + if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/ +#if DIGIT_BIT == 64 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 64); + r->used = 64; + mp_clamp(r); +#elif DIGIT_BIT < 64 + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= a[i] << s; + r->dp[j] &= (1L << DIGIT_BIT) - 1; + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + while (s + DIGIT_BIT <= 64) { + s += DIGIT_BIT; + r->dp[j++] &= (1L << DIGIT_BIT) - 1; + if (s == SP_WORD_SIZE) { + r->dp[j] = 0; + } + else { + r->dp[j] = a[i] >> s; + } + } + s = 64 - s; + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#else + int i, j = 0, s = 0; + + r->dp[0] = 0; + for (i = 0; i < 64; i++) { + r->dp[j] |= ((mp_digit)a[i]) << s; + if (s + 64 >= DIGIT_BIT) { + #if DIGIT_BIT != 32 && DIGIT_BIT != 64 + r->dp[j] &= (1L << DIGIT_BIT) - 1; + #endif + s = DIGIT_BIT - s; + r->dp[++j] = a[i] >> s; + s = 64 - s; + } + else { + s += 64; + } + } + r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT; + mp_clamp(r); +#endif + } + + return err; +} + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. MP integer. + * exp Exponent. MP integer. + * mod Modulus. MP integer. + * res Result. MP integer. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + int expBits = mp_count_bits(exp); + + if (mp_count_bits(base) > 4096 || expBits > 4096 || + mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 64, base); + sp_4096_from_mp(e, 64, exp); + sp_4096_from_mp(m, 64, mod); + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_4096_mod_exp_avx2_64(r, b, e, expBits, m, 0); + else +#endif + err = sp_4096_mod_exp_64(r, b, e, expBits, m, 0); + } + + if (err == MP_OKAY) { + err = sp_4096_to_mp(r, res); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} + +#ifdef WOLFSSL_HAVE_SP_DH +#ifdef HAVE_FFDHE_4096 +extern void sp_4096_lshift_64(sp_digit* r, const sp_digit* a, int n); +#ifdef HAVE_INTEL_AVX2 +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_avx2_64(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[128]; + sp_digit td[65]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 128; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) + c = 64; + c -= bits % 6; + y = (int)(n >> c); + n <<= 64 - c; + sp_4096_lshift_64(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 58); + n <<= 6; + c = 58; + } + else if (c < 6) { + y = (int)(n >> 58); + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + sp_4096_mont_sqr_avx2_64(r, r, m, mp); + + sp_4096_lshift_64(r, r, y); + sp_4096_mul_d_avx2_64(tmp, norm, r[64]); + r[64] = 0; + o = sp_4096_add_64(r, r, tmp); + sp_4096_cond_sub_64(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_avx2_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} +#endif /* HAVE_INTEL_AVX2 */ + +/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m) + * + * r A single precision number that is the result of the operation. + * e A single precision number that is the exponent. + * bits The number of bits in the exponent. + * m A single precision number that is the modulus. + * returns 0 on success and MEMORY_E on dynamic memory allocation failure. + */ +static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, + const sp_digit* m) +{ +#ifndef WOLFSSL_SMALL_STACK + sp_digit nd[128]; + sp_digit td[65]; +#else + sp_digit* td; +#endif + sp_digit* norm; + sp_digit* tmp; + sp_digit mp = 1; + sp_digit n, o; + sp_digit mask; + int i; + int c, y; + int err = MP_OKAY; + +#ifdef WOLFSSL_SMALL_STACK + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (td == NULL) { + err = MEMORY_E; + } +#endif + + if (err == MP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + norm = td; + tmp = td + 128; +#else + norm = nd; + tmp = td; +#endif + + sp_4096_mont_setup(m, &mp); + sp_4096_mont_norm_64(norm, m); + + i = (bits - 1) / 64; + n = e[i--]; + c = bits & 63; + if (c == 0) + c = 64; + c -= bits % 6; + y = (int)(n >> c); + n <<= 64 - c; + sp_4096_lshift_64(r, norm, y); + for (; i>=0 || c>=6; ) { + if (c == 0) { + n = e[i--]; + y = (int)(n >> 58); + n <<= 6; + c = 58; + } + else if (c < 6) { + y = (int)(n >> 58); + n = e[i--]; + c = 6 - c; + y |= n >> (64 - c); + n <<= c; + c = 64 - c; + } + else { + y = (n >> 58) & 0x3f; + n <<= 6; + c -= 6; + } + + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + sp_4096_mont_sqr_64(r, r, m, mp); + + sp_4096_lshift_64(r, r, y); + sp_4096_mul_d_64(tmp, norm, r[64]); + r[64] = 0; + o = sp_4096_add_64(r, r, tmp); + sp_4096_cond_sub_64(r, r, m, (sp_digit)0 - o); + } + + XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); + sp_4096_mont_reduce_64(r, m, mp); + + mask = 0 - (sp_4096_cmp_64(r, m) >= 0); + sp_4096_cond_sub_64(r, r, m, mask); + } + +#ifdef WOLFSSL_SMALL_STACK + if (td != NULL) + XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + + return err; +} + +#endif /* HAVE_FFDHE_4096 */ + +/* Perform the modular exponentiation for Diffie-Hellman. + * + * base Base. + * exp Array of bytes that is the exponent. + * expLen Length of data, in bytes, in exponent. + * mod Modulus. + * out Buffer to hold big-endian bytes of exponentiation result. + * Must be at least 512 bytes long. + * outLen Length, in bytes, of exponentiation result. + * returs 0 on success, MP_READ_E if there are too many bytes in an array + * and MEMORY_E if memory allocation fails. + */ +int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen) +{ + int err = MP_OKAY; + sp_digit b[128], e[64], m[64]; + sp_digit* r = b; + word32 i; +#ifdef HAVE_INTEL_AVX2 + word32 cpuid_flags = cpuid_get_flags(); +#endif + + if (mp_count_bits(base) > 4096 || expLen > 512 || + mp_count_bits(mod) != 4096) { + err = MP_READ_E; + } + + if (err == MP_OKAY) { + sp_4096_from_mp(b, 64, base); + sp_4096_from_bin(e, 64, exp, expLen); + sp_4096_from_mp(m, 64, mod); + + #ifdef HAVE_FFDHE_4096 + if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1) { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_4096_mod_exp_2_avx2_64(r, e, expLen * 8, m); + else +#endif + err = sp_4096_mod_exp_2_64(r, e, expLen * 8, m); + } + else + #endif + { +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) + err = sp_4096_mod_exp_avx2_64(r, b, e, expLen * 8, m, 0); + else +#endif + err = sp_4096_mod_exp_64(r, b, e, expLen * 8, m, 0); + } + } + + if (err == MP_OKAY) { + sp_4096_to_bin(r, out); + *outLen = 512; + for (i=0; i<512 && out[i] == 0; i++) { + } + *outLen -= i; + XMEMMOVE(out, out + i, *outLen); + } + + XMEMSET(e, 0, sizeof(e)); + + return err; +} +#endif +#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */ + +#endif /* WOLFSSL_SP_4096 */ + #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */ #ifdef WOLFSSL_HAVE_SP_ECC #ifndef WOLFSSL_SP_NO_256 diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S index 26249845b..57248a53f 100644 --- a/wolfcrypt/src/sp_x86_64_asm.S +++ b/wolfcrypt/src/sp_x86_64_asm.S @@ -20,6 +20,7 @@ */ #define HAVE_INTEL_AVX2 +#ifndef WOLFSSL_SP_NO_2048 /* Multiply a and b into r. (r = a * b) * * r A single precision integer. @@ -8591,6 +8592,8 @@ _sp_2048_lshift_32: movq %r8, 16(%rdi) movq %r11, 24(%rdi) repz retq +#endif /* !WOLFSSL_SP_NO_2048 */ +#ifndef WOLFSSL_SP_NO_3072 /* Multiply a and b into r. (r = a * b) * * r A single precision integer. @@ -24612,6 +24615,3797 @@ _sp_3072_lshift_48: movq %r8, 16(%rdi) movq %r11, 24(%rdi) repz retq +#endif /* !WOLFSSL_SP_NO_3072 */ +#ifdef WOLFSSL_SP_4096 +/* Sub b from a into a. (a -= b) + * + * a A single precision integer and result. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_4096_sub_in_place_64 +.type sp_4096_sub_in_place_64,@function +.align 16 +sp_4096_sub_in_place_64: +#else +.globl _sp_4096_sub_in_place_64 +.p2align 4 +_sp_4096_sub_in_place_64: +#endif /* __APPLE__ */ + xorq %rax, %rax + movq (%rdi), %rdx + movq 8(%rdi), %rcx + movq (%rsi), %r8 + movq 8(%rsi), %r9 + subq %r8, %rdx + movq 16(%rsi), %r8 + movq %rdx, (%rdi) + movq 16(%rdi), %rdx + sbbq %r9, %rcx + movq 24(%rsi), %r9 + movq %rcx, 8(%rdi) + movq 24(%rdi), %rcx + sbbq %r8, %rdx + movq 32(%rsi), %r8 + movq %rdx, 16(%rdi) + movq 32(%rdi), %rdx + sbbq %r9, %rcx + movq 40(%rsi), %r9 + movq %rcx, 24(%rdi) + movq 40(%rdi), %rcx + sbbq %r8, %rdx + movq 48(%rsi), %r8 + movq %rdx, 32(%rdi) + movq 48(%rdi), %rdx + sbbq %r9, %rcx + movq 56(%rsi), %r9 + movq %rcx, 40(%rdi) + movq 56(%rdi), %rcx + sbbq %r8, %rdx + movq 64(%rsi), %r8 + movq %rdx, 48(%rdi) + movq 64(%rdi), %rdx + sbbq %r9, %rcx + movq 72(%rsi), %r9 + movq %rcx, 56(%rdi) + movq 72(%rdi), %rcx + sbbq %r8, %rdx + movq 80(%rsi), %r8 + movq %rdx, 64(%rdi) + movq 80(%rdi), %rdx + sbbq %r9, %rcx + movq 88(%rsi), %r9 + movq %rcx, 72(%rdi) + movq 88(%rdi), %rcx + sbbq %r8, %rdx + movq 96(%rsi), %r8 + movq %rdx, 80(%rdi) + movq 96(%rdi), %rdx + sbbq %r9, %rcx + movq 104(%rsi), %r9 + movq %rcx, 88(%rdi) + movq 104(%rdi), %rcx + sbbq %r8, %rdx + movq 112(%rsi), %r8 + movq %rdx, 96(%rdi) + movq 112(%rdi), %rdx + sbbq %r9, %rcx + movq 120(%rsi), %r9 + movq %rcx, 104(%rdi) + movq 120(%rdi), %rcx + sbbq %r8, %rdx + movq 128(%rsi), %r8 + movq %rdx, 112(%rdi) + movq 128(%rdi), %rdx + sbbq %r9, %rcx + movq 136(%rsi), %r9 + movq %rcx, 120(%rdi) + movq 136(%rdi), %rcx + sbbq %r8, %rdx + movq 144(%rsi), %r8 + movq %rdx, 128(%rdi) + movq 144(%rdi), %rdx + sbbq %r9, %rcx + movq 152(%rsi), %r9 + movq %rcx, 136(%rdi) + movq 152(%rdi), %rcx + sbbq %r8, %rdx + movq 160(%rsi), %r8 + movq %rdx, 144(%rdi) + movq 160(%rdi), %rdx + sbbq %r9, %rcx + movq 168(%rsi), %r9 + movq %rcx, 152(%rdi) + movq 168(%rdi), %rcx + sbbq %r8, %rdx + movq 176(%rsi), %r8 + movq %rdx, 160(%rdi) + movq 176(%rdi), %rdx + sbbq %r9, %rcx + movq 184(%rsi), %r9 + movq %rcx, 168(%rdi) + movq 184(%rdi), %rcx + sbbq %r8, %rdx + movq 192(%rsi), %r8 + movq %rdx, 176(%rdi) + movq 192(%rdi), %rdx + sbbq %r9, %rcx + movq 200(%rsi), %r9 + movq %rcx, 184(%rdi) + movq 200(%rdi), %rcx + sbbq %r8, %rdx + movq 208(%rsi), %r8 + movq %rdx, 192(%rdi) + movq 208(%rdi), %rdx + sbbq %r9, %rcx + movq 216(%rsi), %r9 + movq %rcx, 200(%rdi) + movq 216(%rdi), %rcx + sbbq %r8, %rdx + movq 224(%rsi), %r8 + movq %rdx, 208(%rdi) + movq 224(%rdi), %rdx + sbbq %r9, %rcx + movq 232(%rsi), %r9 + movq %rcx, 216(%rdi) + movq 232(%rdi), %rcx + sbbq %r8, %rdx + movq 240(%rsi), %r8 + movq %rdx, 224(%rdi) + movq 240(%rdi), %rdx + sbbq %r9, %rcx + movq 248(%rsi), %r9 + movq %rcx, 232(%rdi) + movq 248(%rdi), %rcx + sbbq %r8, %rdx + movq 256(%rsi), %r8 + movq %rdx, 240(%rdi) + movq 256(%rdi), %rdx + sbbq %r9, %rcx + movq 264(%rsi), %r9 + movq %rcx, 248(%rdi) + movq 264(%rdi), %rcx + sbbq %r8, %rdx + movq 272(%rsi), %r8 + movq %rdx, 256(%rdi) + movq 272(%rdi), %rdx + sbbq %r9, %rcx + movq 280(%rsi), %r9 + movq %rcx, 264(%rdi) + movq 280(%rdi), %rcx + sbbq %r8, %rdx + movq 288(%rsi), %r8 + movq %rdx, 272(%rdi) + movq 288(%rdi), %rdx + sbbq %r9, %rcx + movq 296(%rsi), %r9 + movq %rcx, 280(%rdi) + movq 296(%rdi), %rcx + sbbq %r8, %rdx + movq 304(%rsi), %r8 + movq %rdx, 288(%rdi) + movq 304(%rdi), %rdx + sbbq %r9, %rcx + movq 312(%rsi), %r9 + movq %rcx, 296(%rdi) + movq 312(%rdi), %rcx + sbbq %r8, %rdx + movq 320(%rsi), %r8 + movq %rdx, 304(%rdi) + movq 320(%rdi), %rdx + sbbq %r9, %rcx + movq 328(%rsi), %r9 + movq %rcx, 312(%rdi) + movq 328(%rdi), %rcx + sbbq %r8, %rdx + movq 336(%rsi), %r8 + movq %rdx, 320(%rdi) + movq 336(%rdi), %rdx + sbbq %r9, %rcx + movq 344(%rsi), %r9 + movq %rcx, 328(%rdi) + movq 344(%rdi), %rcx + sbbq %r8, %rdx + movq 352(%rsi), %r8 + movq %rdx, 336(%rdi) + movq 352(%rdi), %rdx + sbbq %r9, %rcx + movq 360(%rsi), %r9 + movq %rcx, 344(%rdi) + movq 360(%rdi), %rcx + sbbq %r8, %rdx + movq 368(%rsi), %r8 + movq %rdx, 352(%rdi) + movq 368(%rdi), %rdx + sbbq %r9, %rcx + movq 376(%rsi), %r9 + movq %rcx, 360(%rdi) + movq 376(%rdi), %rcx + sbbq %r8, %rdx + movq 384(%rsi), %r8 + movq %rdx, 368(%rdi) + movq 384(%rdi), %rdx + sbbq %r9, %rcx + movq 392(%rsi), %r9 + movq %rcx, 376(%rdi) + movq 392(%rdi), %rcx + sbbq %r8, %rdx + movq 400(%rsi), %r8 + movq %rdx, 384(%rdi) + movq 400(%rdi), %rdx + sbbq %r9, %rcx + movq 408(%rsi), %r9 + movq %rcx, 392(%rdi) + movq 408(%rdi), %rcx + sbbq %r8, %rdx + movq 416(%rsi), %r8 + movq %rdx, 400(%rdi) + movq 416(%rdi), %rdx + sbbq %r9, %rcx + movq 424(%rsi), %r9 + movq %rcx, 408(%rdi) + movq 424(%rdi), %rcx + sbbq %r8, %rdx + movq 432(%rsi), %r8 + movq %rdx, 416(%rdi) + movq 432(%rdi), %rdx + sbbq %r9, %rcx + movq 440(%rsi), %r9 + movq %rcx, 424(%rdi) + movq 440(%rdi), %rcx + sbbq %r8, %rdx + movq 448(%rsi), %r8 + movq %rdx, 432(%rdi) + movq 448(%rdi), %rdx + sbbq %r9, %rcx + movq 456(%rsi), %r9 + movq %rcx, 440(%rdi) + movq 456(%rdi), %rcx + sbbq %r8, %rdx + movq 464(%rsi), %r8 + movq %rdx, 448(%rdi) + movq 464(%rdi), %rdx + sbbq %r9, %rcx + movq 472(%rsi), %r9 + movq %rcx, 456(%rdi) + movq 472(%rdi), %rcx + sbbq %r8, %rdx + movq 480(%rsi), %r8 + movq %rdx, 464(%rdi) + movq 480(%rdi), %rdx + sbbq %r9, %rcx + movq 488(%rsi), %r9 + movq %rcx, 472(%rdi) + movq 488(%rdi), %rcx + sbbq %r8, %rdx + movq 496(%rsi), %r8 + movq %rdx, 480(%rdi) + movq 496(%rdi), %rdx + sbbq %r9, %rcx + movq 504(%rsi), %r9 + movq %rcx, 488(%rdi) + movq 504(%rdi), %rcx + sbbq %r8, %rdx + movq %rdx, 496(%rdi) + sbbq %r9, %rcx + movq %rcx, 504(%rdi) + sbbq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_4096_sub_in_place_64,.-sp_4096_sub_in_place_64 +#endif /* __APPLE__ */ +/* Add b to a into r. (r = a + b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +#ifndef __APPLE__ +.globl sp_4096_add_64 +.type sp_4096_add_64,@function +.align 16 +sp_4096_add_64: +#else +.globl _sp_4096_add_64 +.p2align 4 +_sp_4096_add_64: +#endif /* __APPLE__ */ + xorq %rax, %rax + movq (%rsi), %rcx + addq (%rdx), %rcx + movq %rcx, (%rdi) + movq 8(%rsi), %rcx + adcq 8(%rdx), %rcx + movq %rcx, 8(%rdi) + movq 16(%rsi), %rcx + adcq 16(%rdx), %rcx + movq %rcx, 16(%rdi) + movq 24(%rsi), %rcx + adcq 24(%rdx), %rcx + movq %rcx, 24(%rdi) + movq 32(%rsi), %rcx + adcq 32(%rdx), %rcx + movq %rcx, 32(%rdi) + movq 40(%rsi), %rcx + adcq 40(%rdx), %rcx + movq %rcx, 40(%rdi) + movq 48(%rsi), %rcx + adcq 48(%rdx), %rcx + movq %rcx, 48(%rdi) + movq 56(%rsi), %rcx + adcq 56(%rdx), %rcx + movq %rcx, 56(%rdi) + movq 64(%rsi), %rcx + adcq 64(%rdx), %rcx + movq %rcx, 64(%rdi) + movq 72(%rsi), %rcx + adcq 72(%rdx), %rcx + movq %rcx, 72(%rdi) + movq 80(%rsi), %rcx + adcq 80(%rdx), %rcx + movq %rcx, 80(%rdi) + movq 88(%rsi), %rcx + adcq 88(%rdx), %rcx + movq %rcx, 88(%rdi) + movq 96(%rsi), %rcx + adcq 96(%rdx), %rcx + movq %rcx, 96(%rdi) + movq 104(%rsi), %rcx + adcq 104(%rdx), %rcx + movq %rcx, 104(%rdi) + movq 112(%rsi), %rcx + adcq 112(%rdx), %rcx + movq %rcx, 112(%rdi) + movq 120(%rsi), %rcx + adcq 120(%rdx), %rcx + movq %rcx, 120(%rdi) + movq 128(%rsi), %rcx + adcq 128(%rdx), %rcx + movq %rcx, 128(%rdi) + movq 136(%rsi), %rcx + adcq 136(%rdx), %rcx + movq %rcx, 136(%rdi) + movq 144(%rsi), %rcx + adcq 144(%rdx), %rcx + movq %rcx, 144(%rdi) + movq 152(%rsi), %rcx + adcq 152(%rdx), %rcx + movq %rcx, 152(%rdi) + movq 160(%rsi), %rcx + adcq 160(%rdx), %rcx + movq %rcx, 160(%rdi) + movq 168(%rsi), %rcx + adcq 168(%rdx), %rcx + movq %rcx, 168(%rdi) + movq 176(%rsi), %rcx + adcq 176(%rdx), %rcx + movq %rcx, 176(%rdi) + movq 184(%rsi), %rcx + adcq 184(%rdx), %rcx + movq %rcx, 184(%rdi) + movq 192(%rsi), %rcx + adcq 192(%rdx), %rcx + movq %rcx, 192(%rdi) + movq 200(%rsi), %rcx + adcq 200(%rdx), %rcx + movq %rcx, 200(%rdi) + movq 208(%rsi), %rcx + adcq 208(%rdx), %rcx + movq %rcx, 208(%rdi) + movq 216(%rsi), %rcx + adcq 216(%rdx), %rcx + movq %rcx, 216(%rdi) + movq 224(%rsi), %rcx + adcq 224(%rdx), %rcx + movq %rcx, 224(%rdi) + movq 232(%rsi), %rcx + adcq 232(%rdx), %rcx + movq %rcx, 232(%rdi) + movq 240(%rsi), %rcx + adcq 240(%rdx), %rcx + movq %rcx, 240(%rdi) + movq 248(%rsi), %rcx + adcq 248(%rdx), %rcx + movq %rcx, 248(%rdi) + movq 256(%rsi), %rcx + adcq 256(%rdx), %rcx + movq %rcx, 256(%rdi) + movq 264(%rsi), %rcx + adcq 264(%rdx), %rcx + movq %rcx, 264(%rdi) + movq 272(%rsi), %rcx + adcq 272(%rdx), %rcx + movq %rcx, 272(%rdi) + movq 280(%rsi), %rcx + adcq 280(%rdx), %rcx + movq %rcx, 280(%rdi) + movq 288(%rsi), %rcx + adcq 288(%rdx), %rcx + movq %rcx, 288(%rdi) + movq 296(%rsi), %rcx + adcq 296(%rdx), %rcx + movq %rcx, 296(%rdi) + movq 304(%rsi), %rcx + adcq 304(%rdx), %rcx + movq %rcx, 304(%rdi) + movq 312(%rsi), %rcx + adcq 312(%rdx), %rcx + movq %rcx, 312(%rdi) + movq 320(%rsi), %rcx + adcq 320(%rdx), %rcx + movq %rcx, 320(%rdi) + movq 328(%rsi), %rcx + adcq 328(%rdx), %rcx + movq %rcx, 328(%rdi) + movq 336(%rsi), %rcx + adcq 336(%rdx), %rcx + movq %rcx, 336(%rdi) + movq 344(%rsi), %rcx + adcq 344(%rdx), %rcx + movq %rcx, 344(%rdi) + movq 352(%rsi), %rcx + adcq 352(%rdx), %rcx + movq %rcx, 352(%rdi) + movq 360(%rsi), %rcx + adcq 360(%rdx), %rcx + movq %rcx, 360(%rdi) + movq 368(%rsi), %rcx + adcq 368(%rdx), %rcx + movq %rcx, 368(%rdi) + movq 376(%rsi), %rcx + adcq 376(%rdx), %rcx + movq %rcx, 376(%rdi) + movq 384(%rsi), %rcx + adcq 384(%rdx), %rcx + movq %rcx, 384(%rdi) + movq 392(%rsi), %rcx + adcq 392(%rdx), %rcx + movq %rcx, 392(%rdi) + movq 400(%rsi), %rcx + adcq 400(%rdx), %rcx + movq %rcx, 400(%rdi) + movq 408(%rsi), %rcx + adcq 408(%rdx), %rcx + movq %rcx, 408(%rdi) + movq 416(%rsi), %rcx + adcq 416(%rdx), %rcx + movq %rcx, 416(%rdi) + movq 424(%rsi), %rcx + adcq 424(%rdx), %rcx + movq %rcx, 424(%rdi) + movq 432(%rsi), %rcx + adcq 432(%rdx), %rcx + movq %rcx, 432(%rdi) + movq 440(%rsi), %rcx + adcq 440(%rdx), %rcx + movq %rcx, 440(%rdi) + movq 448(%rsi), %rcx + adcq 448(%rdx), %rcx + movq %rcx, 448(%rdi) + movq 456(%rsi), %rcx + adcq 456(%rdx), %rcx + movq %rcx, 456(%rdi) + movq 464(%rsi), %rcx + adcq 464(%rdx), %rcx + movq %rcx, 464(%rdi) + movq 472(%rsi), %rcx + adcq 472(%rdx), %rcx + movq %rcx, 472(%rdi) + movq 480(%rsi), %rcx + adcq 480(%rdx), %rcx + movq %rcx, 480(%rdi) + movq 488(%rsi), %rcx + adcq 488(%rdx), %rcx + movq %rcx, 488(%rdi) + movq 496(%rsi), %rcx + adcq 496(%rdx), %rcx + movq %rcx, 496(%rdi) + movq 504(%rsi), %rcx + adcq 504(%rdx), %rcx + movq %rcx, 504(%rdi) + adcq $0, %rax + repz retq +#ifndef __APPLE__ +.size sp_4096_add_64,.-sp_4096_add_64 +#endif /* __APPLE__ */ +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_4096_mul_d_64 +.type sp_4096_mul_d_64,@function +.align 16 +sp_4096_mul_d_64: +#else +.globl _sp_4096_mul_d_64 +.p2align 4 +_sp_4096_mul_d_64: +#endif /* __APPLE__ */ + movq %rdx, %rcx + # A[0] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq (%rsi) + movq %rax, %r8 + movq %rdx, %r9 + movq %r8, (%rdi) + # A[1] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 8(%rsi) + addq %rax, %r9 + movq %r9, 8(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[2] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 16(%rsi) + addq %rax, %r10 + movq %r10, 16(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[3] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 24(%rsi) + addq %rax, %r8 + movq %r8, 24(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[4] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 32(%rsi) + addq %rax, %r9 + movq %r9, 32(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[5] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 40(%rsi) + addq %rax, %r10 + movq %r10, 40(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[6] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 48(%rsi) + addq %rax, %r8 + movq %r8, 48(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[7] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 56(%rsi) + addq %rax, %r9 + movq %r9, 56(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[8] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 64(%rsi) + addq %rax, %r10 + movq %r10, 64(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[9] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 72(%rsi) + addq %rax, %r8 + movq %r8, 72(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[10] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 80(%rsi) + addq %rax, %r9 + movq %r9, 80(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[11] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 88(%rsi) + addq %rax, %r10 + movq %r10, 88(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[12] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 96(%rsi) + addq %rax, %r8 + movq %r8, 96(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[13] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 104(%rsi) + addq %rax, %r9 + movq %r9, 104(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[14] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 112(%rsi) + addq %rax, %r10 + movq %r10, 112(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[15] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 120(%rsi) + addq %rax, %r8 + movq %r8, 120(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[16] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 128(%rsi) + addq %rax, %r9 + movq %r9, 128(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[17] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 136(%rsi) + addq %rax, %r10 + movq %r10, 136(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[18] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 144(%rsi) + addq %rax, %r8 + movq %r8, 144(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[19] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 152(%rsi) + addq %rax, %r9 + movq %r9, 152(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[20] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 160(%rsi) + addq %rax, %r10 + movq %r10, 160(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[21] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 168(%rsi) + addq %rax, %r8 + movq %r8, 168(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[22] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 176(%rsi) + addq %rax, %r9 + movq %r9, 176(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[23] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 184(%rsi) + addq %rax, %r10 + movq %r10, 184(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[24] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 192(%rsi) + addq %rax, %r8 + movq %r8, 192(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[25] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 200(%rsi) + addq %rax, %r9 + movq %r9, 200(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[26] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 208(%rsi) + addq %rax, %r10 + movq %r10, 208(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[27] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 216(%rsi) + addq %rax, %r8 + movq %r8, 216(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[28] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 224(%rsi) + addq %rax, %r9 + movq %r9, 224(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[29] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 232(%rsi) + addq %rax, %r10 + movq %r10, 232(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[30] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 240(%rsi) + addq %rax, %r8 + movq %r8, 240(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[31] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 248(%rsi) + addq %rax, %r9 + movq %r9, 248(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[32] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 256(%rsi) + addq %rax, %r10 + movq %r10, 256(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[33] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 264(%rsi) + addq %rax, %r8 + movq %r8, 264(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[34] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 272(%rsi) + addq %rax, %r9 + movq %r9, 272(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[35] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 280(%rsi) + addq %rax, %r10 + movq %r10, 280(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[36] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 288(%rsi) + addq %rax, %r8 + movq %r8, 288(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[37] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 296(%rsi) + addq %rax, %r9 + movq %r9, 296(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[38] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 304(%rsi) + addq %rax, %r10 + movq %r10, 304(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[39] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 312(%rsi) + addq %rax, %r8 + movq %r8, 312(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[40] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 320(%rsi) + addq %rax, %r9 + movq %r9, 320(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[41] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 328(%rsi) + addq %rax, %r10 + movq %r10, 328(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[42] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 336(%rsi) + addq %rax, %r8 + movq %r8, 336(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[43] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 344(%rsi) + addq %rax, %r9 + movq %r9, 344(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[44] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 352(%rsi) + addq %rax, %r10 + movq %r10, 352(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[45] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 360(%rsi) + addq %rax, %r8 + movq %r8, 360(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[46] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 368(%rsi) + addq %rax, %r9 + movq %r9, 368(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[47] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 376(%rsi) + addq %rax, %r10 + movq %r10, 376(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[48] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 384(%rsi) + addq %rax, %r8 + movq %r8, 384(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[49] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 392(%rsi) + addq %rax, %r9 + movq %r9, 392(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[50] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 400(%rsi) + addq %rax, %r10 + movq %r10, 400(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[51] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 408(%rsi) + addq %rax, %r8 + movq %r8, 408(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[52] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 416(%rsi) + addq %rax, %r9 + movq %r9, 416(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[53] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 424(%rsi) + addq %rax, %r10 + movq %r10, 424(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[54] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 432(%rsi) + addq %rax, %r8 + movq %r8, 432(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[55] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 440(%rsi) + addq %rax, %r9 + movq %r9, 440(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[56] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 448(%rsi) + addq %rax, %r10 + movq %r10, 448(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[57] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 456(%rsi) + addq %rax, %r8 + movq %r8, 456(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[58] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 464(%rsi) + addq %rax, %r9 + movq %r9, 464(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[59] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 472(%rsi) + addq %rax, %r10 + movq %r10, 472(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # A[60] * B + movq %rcx, %rax + xorq %r10, %r10 + mulq 480(%rsi) + addq %rax, %r8 + movq %r8, 480(%rdi) + adcq %rdx, %r9 + adcq $0, %r10 + # A[61] * B + movq %rcx, %rax + xorq %r8, %r8 + mulq 488(%rsi) + addq %rax, %r9 + movq %r9, 488(%rdi) + adcq %rdx, %r10 + adcq $0, %r8 + # A[62] * B + movq %rcx, %rax + xorq %r9, %r9 + mulq 496(%rsi) + addq %rax, %r10 + movq %r10, 496(%rdi) + adcq %rdx, %r8 + adcq $0, %r9 + # # A[63] * B + movq %rcx, %rax + mulq 504(%rsi) + addq %rax, %r8 + adcq %rdx, %r9 + movq %r8, 504(%rdi) + movq %r9, 512(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_4096_mul_d_64,.-sp_4096_mul_d_64 +#endif /* __APPLE__ */ +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +#ifndef __APPLE__ +.globl sp_4096_cond_sub_64 +.type sp_4096_cond_sub_64,@function +.align 16 +sp_4096_cond_sub_64: +#else +.globl _sp_4096_cond_sub_64 +.p2align 4 +_sp_4096_cond_sub_64: +#endif /* __APPLE__ */ + subq $512, %rsp + movq $0, %rax + movq (%rdx), %r8 + movq 8(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq 16(%rdx), %r8 + movq 24(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 16(%rsp) + movq %r9, 24(%rsp) + movq 32(%rdx), %r8 + movq 40(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq 48(%rdx), %r8 + movq 56(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 48(%rsp) + movq %r9, 56(%rsp) + movq 64(%rdx), %r8 + movq 72(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 64(%rsp) + movq %r9, 72(%rsp) + movq 80(%rdx), %r8 + movq 88(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 80(%rsp) + movq %r9, 88(%rsp) + movq 96(%rdx), %r8 + movq 104(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 96(%rsp) + movq %r9, 104(%rsp) + movq 112(%rdx), %r8 + movq 120(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 112(%rsp) + movq %r9, 120(%rsp) + movq 128(%rdx), %r8 + movq 136(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 128(%rsp) + movq %r9, 136(%rsp) + movq 144(%rdx), %r8 + movq 152(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 144(%rsp) + movq %r9, 152(%rsp) + movq 160(%rdx), %r8 + movq 168(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 160(%rsp) + movq %r9, 168(%rsp) + movq 176(%rdx), %r8 + movq 184(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 176(%rsp) + movq %r9, 184(%rsp) + movq 192(%rdx), %r8 + movq 200(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 192(%rsp) + movq %r9, 200(%rsp) + movq 208(%rdx), %r8 + movq 216(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 208(%rsp) + movq %r9, 216(%rsp) + movq 224(%rdx), %r8 + movq 232(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 224(%rsp) + movq %r9, 232(%rsp) + movq 240(%rdx), %r8 + movq 248(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 240(%rsp) + movq %r9, 248(%rsp) + movq 256(%rdx), %r8 + movq 264(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 256(%rsp) + movq %r9, 264(%rsp) + movq 272(%rdx), %r8 + movq 280(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 272(%rsp) + movq %r9, 280(%rsp) + movq 288(%rdx), %r8 + movq 296(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 288(%rsp) + movq %r9, 296(%rsp) + movq 304(%rdx), %r8 + movq 312(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 304(%rsp) + movq %r9, 312(%rsp) + movq 320(%rdx), %r8 + movq 328(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 320(%rsp) + movq %r9, 328(%rsp) + movq 336(%rdx), %r8 + movq 344(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 336(%rsp) + movq %r9, 344(%rsp) + movq 352(%rdx), %r8 + movq 360(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 352(%rsp) + movq %r9, 360(%rsp) + movq 368(%rdx), %r8 + movq 376(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 368(%rsp) + movq %r9, 376(%rsp) + movq 384(%rdx), %r8 + movq 392(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 384(%rsp) + movq %r9, 392(%rsp) + movq 400(%rdx), %r8 + movq 408(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 400(%rsp) + movq %r9, 408(%rsp) + movq 416(%rdx), %r8 + movq 424(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 416(%rsp) + movq %r9, 424(%rsp) + movq 432(%rdx), %r8 + movq 440(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 432(%rsp) + movq %r9, 440(%rsp) + movq 448(%rdx), %r8 + movq 456(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 448(%rsp) + movq %r9, 456(%rsp) + movq 464(%rdx), %r8 + movq 472(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 464(%rsp) + movq %r9, 472(%rsp) + movq 480(%rdx), %r8 + movq 488(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 480(%rsp) + movq %r9, 488(%rsp) + movq 496(%rdx), %r8 + movq 504(%rdx), %r9 + andq %rcx, %r8 + andq %rcx, %r9 + movq %r8, 496(%rsp) + movq %r9, 504(%rsp) + movq (%rsi), %r8 + movq (%rsp), %rdx + subq %rdx, %r8 + movq 8(%rsi), %r9 + movq 8(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, (%rdi) + movq 16(%rsi), %r8 + movq 16(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 8(%rdi) + movq 24(%rsi), %r9 + movq 24(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 16(%rdi) + movq 32(%rsi), %r8 + movq 32(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 24(%rdi) + movq 40(%rsi), %r9 + movq 40(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 32(%rdi) + movq 48(%rsi), %r8 + movq 48(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 40(%rdi) + movq 56(%rsi), %r9 + movq 56(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 48(%rdi) + movq 64(%rsi), %r8 + movq 64(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 56(%rdi) + movq 72(%rsi), %r9 + movq 72(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 64(%rdi) + movq 80(%rsi), %r8 + movq 80(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 72(%rdi) + movq 88(%rsi), %r9 + movq 88(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 80(%rdi) + movq 96(%rsi), %r8 + movq 96(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 88(%rdi) + movq 104(%rsi), %r9 + movq 104(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 96(%rdi) + movq 112(%rsi), %r8 + movq 112(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 104(%rdi) + movq 120(%rsi), %r9 + movq 120(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 112(%rdi) + movq 128(%rsi), %r8 + movq 128(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 120(%rdi) + movq 136(%rsi), %r9 + movq 136(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 128(%rdi) + movq 144(%rsi), %r8 + movq 144(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 136(%rdi) + movq 152(%rsi), %r9 + movq 152(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 144(%rdi) + movq 160(%rsi), %r8 + movq 160(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 152(%rdi) + movq 168(%rsi), %r9 + movq 168(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 160(%rdi) + movq 176(%rsi), %r8 + movq 176(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 168(%rdi) + movq 184(%rsi), %r9 + movq 184(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 176(%rdi) + movq 192(%rsi), %r8 + movq 192(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 184(%rdi) + movq 200(%rsi), %r9 + movq 200(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 192(%rdi) + movq 208(%rsi), %r8 + movq 208(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 200(%rdi) + movq 216(%rsi), %r9 + movq 216(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 208(%rdi) + movq 224(%rsi), %r8 + movq 224(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 216(%rdi) + movq 232(%rsi), %r9 + movq 232(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 224(%rdi) + movq 240(%rsi), %r8 + movq 240(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 232(%rdi) + movq 248(%rsi), %r9 + movq 248(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 240(%rdi) + movq 256(%rsi), %r8 + movq 256(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 248(%rdi) + movq 264(%rsi), %r9 + movq 264(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 256(%rdi) + movq 272(%rsi), %r8 + movq 272(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 264(%rdi) + movq 280(%rsi), %r9 + movq 280(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 272(%rdi) + movq 288(%rsi), %r8 + movq 288(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 280(%rdi) + movq 296(%rsi), %r9 + movq 296(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 288(%rdi) + movq 304(%rsi), %r8 + movq 304(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 296(%rdi) + movq 312(%rsi), %r9 + movq 312(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 304(%rdi) + movq 320(%rsi), %r8 + movq 320(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 312(%rdi) + movq 328(%rsi), %r9 + movq 328(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 320(%rdi) + movq 336(%rsi), %r8 + movq 336(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 328(%rdi) + movq 344(%rsi), %r9 + movq 344(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 336(%rdi) + movq 352(%rsi), %r8 + movq 352(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 344(%rdi) + movq 360(%rsi), %r9 + movq 360(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 352(%rdi) + movq 368(%rsi), %r8 + movq 368(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 360(%rdi) + movq 376(%rsi), %r9 + movq 376(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 368(%rdi) + movq 384(%rsi), %r8 + movq 384(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 376(%rdi) + movq 392(%rsi), %r9 + movq 392(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 384(%rdi) + movq 400(%rsi), %r8 + movq 400(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 392(%rdi) + movq 408(%rsi), %r9 + movq 408(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 400(%rdi) + movq 416(%rsi), %r8 + movq 416(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 408(%rdi) + movq 424(%rsi), %r9 + movq 424(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 416(%rdi) + movq 432(%rsi), %r8 + movq 432(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 424(%rdi) + movq 440(%rsi), %r9 + movq 440(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 432(%rdi) + movq 448(%rsi), %r8 + movq 448(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 440(%rdi) + movq 456(%rsi), %r9 + movq 456(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 448(%rdi) + movq 464(%rsi), %r8 + movq 464(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 456(%rdi) + movq 472(%rsi), %r9 + movq 472(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 464(%rdi) + movq 480(%rsi), %r8 + movq 480(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 472(%rdi) + movq 488(%rsi), %r9 + movq 488(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 480(%rdi) + movq 496(%rsi), %r8 + movq 496(%rsp), %rdx + sbbq %rdx, %r8 + movq %r9, 488(%rdi) + movq 504(%rsi), %r9 + movq 504(%rsp), %rdx + sbbq %rdx, %r9 + movq %r8, 496(%rdi) + movq %r9, 504(%rdi) + sbbq $0, %rax + addq $512, %rsp + repz retq +#ifndef __APPLE__ +.size sp_4096_cond_sub_64,.-sp_4096_cond_sub_64 +#endif /* __APPLE__ */ +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_4096_mont_reduce_64 +.type sp_4096_mont_reduce_64,@function +.align 16 +sp_4096_mont_reduce_64: +#else +.globl _sp_4096_mont_reduce_64 +.p2align 4 +_sp_4096_mont_reduce_64: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + push %r15 + movq %rdx, %rcx + xorq %r15, %r15 + # i = 0 + movq $64, %r8 + movq (%rdi), %r13 + movq 8(%rdi), %r14 +L_mont_loop_64: + # mu = a[i] * mp + movq %r13, %r11 + imulq %rcx, %r11 + # a[i+0] += m[0] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq (%rsi) + addq %rax, %r13 + adcq %rdx, %r10 + # a[i+1] += m[1] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 8(%rsi) + movq %r14, %r13 + addq %rax, %r13 + adcq %rdx, %r9 + addq %r10, %r13 + adcq $0, %r9 + # a[i+2] += m[2] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 16(%rsi) + movq 16(%rdi), %r14 + addq %rax, %r14 + adcq %rdx, %r10 + addq %r9, %r14 + adcq $0, %r10 + # a[i+3] += m[3] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 24(%rsi) + movq 24(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 24(%rdi) + adcq $0, %r9 + # a[i+4] += m[4] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 32(%rsi) + movq 32(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 32(%rdi) + adcq $0, %r10 + # a[i+5] += m[5] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 40(%rsi) + movq 40(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 40(%rdi) + adcq $0, %r9 + # a[i+6] += m[6] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 48(%rsi) + movq 48(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 48(%rdi) + adcq $0, %r10 + # a[i+7] += m[7] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 56(%rsi) + movq 56(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 56(%rdi) + adcq $0, %r9 + # a[i+8] += m[8] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 64(%rsi) + movq 64(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 64(%rdi) + adcq $0, %r10 + # a[i+9] += m[9] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 72(%rsi) + movq 72(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 72(%rdi) + adcq $0, %r9 + # a[i+10] += m[10] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 80(%rsi) + movq 80(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 80(%rdi) + adcq $0, %r10 + # a[i+11] += m[11] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 88(%rsi) + movq 88(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 88(%rdi) + adcq $0, %r9 + # a[i+12] += m[12] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 96(%rsi) + movq 96(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 96(%rdi) + adcq $0, %r10 + # a[i+13] += m[13] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 104(%rsi) + movq 104(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 104(%rdi) + adcq $0, %r9 + # a[i+14] += m[14] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 112(%rsi) + movq 112(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 112(%rdi) + adcq $0, %r10 + # a[i+15] += m[15] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 120(%rsi) + movq 120(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 120(%rdi) + adcq $0, %r9 + # a[i+16] += m[16] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 128(%rsi) + movq 128(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 128(%rdi) + adcq $0, %r10 + # a[i+17] += m[17] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 136(%rsi) + movq 136(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 136(%rdi) + adcq $0, %r9 + # a[i+18] += m[18] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 144(%rsi) + movq 144(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 144(%rdi) + adcq $0, %r10 + # a[i+19] += m[19] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 152(%rsi) + movq 152(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 152(%rdi) + adcq $0, %r9 + # a[i+20] += m[20] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 160(%rsi) + movq 160(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 160(%rdi) + adcq $0, %r10 + # a[i+21] += m[21] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 168(%rsi) + movq 168(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 168(%rdi) + adcq $0, %r9 + # a[i+22] += m[22] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 176(%rsi) + movq 176(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 176(%rdi) + adcq $0, %r10 + # a[i+23] += m[23] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 184(%rsi) + movq 184(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 184(%rdi) + adcq $0, %r9 + # a[i+24] += m[24] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 192(%rsi) + movq 192(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 192(%rdi) + adcq $0, %r10 + # a[i+25] += m[25] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 200(%rsi) + movq 200(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 200(%rdi) + adcq $0, %r9 + # a[i+26] += m[26] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 208(%rsi) + movq 208(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 208(%rdi) + adcq $0, %r10 + # a[i+27] += m[27] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 216(%rsi) + movq 216(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 216(%rdi) + adcq $0, %r9 + # a[i+28] += m[28] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 224(%rsi) + movq 224(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 224(%rdi) + adcq $0, %r10 + # a[i+29] += m[29] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 232(%rsi) + movq 232(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 232(%rdi) + adcq $0, %r9 + # a[i+30] += m[30] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 240(%rsi) + movq 240(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 240(%rdi) + adcq $0, %r10 + # a[i+31] += m[31] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 248(%rsi) + movq 248(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 248(%rdi) + adcq $0, %r9 + # a[i+32] += m[32] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 256(%rsi) + movq 256(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 256(%rdi) + adcq $0, %r10 + # a[i+33] += m[33] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 264(%rsi) + movq 264(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 264(%rdi) + adcq $0, %r9 + # a[i+34] += m[34] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 272(%rsi) + movq 272(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 272(%rdi) + adcq $0, %r10 + # a[i+35] += m[35] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 280(%rsi) + movq 280(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 280(%rdi) + adcq $0, %r9 + # a[i+36] += m[36] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 288(%rsi) + movq 288(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 288(%rdi) + adcq $0, %r10 + # a[i+37] += m[37] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 296(%rsi) + movq 296(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 296(%rdi) + adcq $0, %r9 + # a[i+38] += m[38] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 304(%rsi) + movq 304(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 304(%rdi) + adcq $0, %r10 + # a[i+39] += m[39] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 312(%rsi) + movq 312(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 312(%rdi) + adcq $0, %r9 + # a[i+40] += m[40] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 320(%rsi) + movq 320(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 320(%rdi) + adcq $0, %r10 + # a[i+41] += m[41] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 328(%rsi) + movq 328(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 328(%rdi) + adcq $0, %r9 + # a[i+42] += m[42] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 336(%rsi) + movq 336(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 336(%rdi) + adcq $0, %r10 + # a[i+43] += m[43] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 344(%rsi) + movq 344(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 344(%rdi) + adcq $0, %r9 + # a[i+44] += m[44] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 352(%rsi) + movq 352(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 352(%rdi) + adcq $0, %r10 + # a[i+45] += m[45] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 360(%rsi) + movq 360(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 360(%rdi) + adcq $0, %r9 + # a[i+46] += m[46] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 368(%rsi) + movq 368(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 368(%rdi) + adcq $0, %r10 + # a[i+47] += m[47] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 376(%rsi) + movq 376(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 376(%rdi) + adcq $0, %r9 + # a[i+48] += m[48] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 384(%rsi) + movq 384(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 384(%rdi) + adcq $0, %r10 + # a[i+49] += m[49] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 392(%rsi) + movq 392(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 392(%rdi) + adcq $0, %r9 + # a[i+50] += m[50] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 400(%rsi) + movq 400(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 400(%rdi) + adcq $0, %r10 + # a[i+51] += m[51] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 408(%rsi) + movq 408(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 408(%rdi) + adcq $0, %r9 + # a[i+52] += m[52] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 416(%rsi) + movq 416(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 416(%rdi) + adcq $0, %r10 + # a[i+53] += m[53] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 424(%rsi) + movq 424(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 424(%rdi) + adcq $0, %r9 + # a[i+54] += m[54] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 432(%rsi) + movq 432(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 432(%rdi) + adcq $0, %r10 + # a[i+55] += m[55] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 440(%rsi) + movq 440(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 440(%rdi) + adcq $0, %r9 + # a[i+56] += m[56] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 448(%rsi) + movq 448(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 448(%rdi) + adcq $0, %r10 + # a[i+57] += m[57] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 456(%rsi) + movq 456(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 456(%rdi) + adcq $0, %r9 + # a[i+58] += m[58] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 464(%rsi) + movq 464(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 464(%rdi) + adcq $0, %r10 + # a[i+59] += m[59] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 472(%rsi) + movq 472(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 472(%rdi) + adcq $0, %r9 + # a[i+60] += m[60] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 480(%rsi) + movq 480(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 480(%rdi) + adcq $0, %r10 + # a[i+61] += m[61] * mu + movq %r11, %rax + xorq %r9, %r9 + mulq 488(%rsi) + movq 488(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r9 + addq %r10, %r12 + movq %r12, 488(%rdi) + adcq $0, %r9 + # a[i+62] += m[62] * mu + movq %r11, %rax + xorq %r10, %r10 + mulq 496(%rsi) + movq 496(%rdi), %r12 + addq %rax, %r12 + adcq %rdx, %r10 + addq %r9, %r12 + movq %r12, 496(%rdi) + adcq $0, %r10 + # a[i+63] += m[63] * mu + movq %r11, %rax + mulq 504(%rsi) + movq 504(%rdi), %r12 + addq %rax, %r10 + adcq %r15, %rdx + movq $0, %r15 + adcq $0, %r15 + addq %r10, %r12 + movq %r12, 504(%rdi) + adcq %rdx, 512(%rdi) + adcq $0, %r15 + # i += 1 + addq $8, %rdi + decq %r8 + jnz L_mont_loop_64 + movq %r13, (%rdi) + movq %r14, 8(%rdi) + negq %r15 + movq %r15, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + movq %rdi, %rdi + subq $512, %rdi +#ifndef __APPLE__ + callq sp_4096_cond_sub_64@plt +#else + callq _sp_4096_cond_sub_64 +#endif /* __APPLE__ */ + pop %r15 + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_4096_mont_reduce_64,.-sp_4096_mont_reduce_64 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +#ifndef __APPLE__ +.globl sp_4096_mul_d_avx2_64 +.type sp_4096_mul_d_avx2_64,@function +.align 16 +sp_4096_mul_d_avx2_64: +#else +.globl _sp_4096_mul_d_avx2_64 +.p2align 4 +_sp_4096_mul_d_avx2_64: +#endif /* __APPLE__ */ + movq %rdx, %rax + # A[0] * B + movq %rax, %rdx + xorq %r11, %r11 + mulxq (%rsi), %r9, %r10 + movq %r9, (%rdi) + # A[1] * B + mulxq 8(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 8(%rdi) + adoxq %r8, %r9 + # A[2] * B + mulxq 16(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 16(%rdi) + adoxq %r8, %r10 + # A[3] * B + mulxq 24(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 24(%rdi) + adoxq %r8, %r9 + # A[4] * B + mulxq 32(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 32(%rdi) + adoxq %r8, %r10 + # A[5] * B + mulxq 40(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 40(%rdi) + adoxq %r8, %r9 + # A[6] * B + mulxq 48(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 48(%rdi) + adoxq %r8, %r10 + # A[7] * B + mulxq 56(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 56(%rdi) + adoxq %r8, %r9 + # A[8] * B + mulxq 64(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 64(%rdi) + adoxq %r8, %r10 + # A[9] * B + mulxq 72(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 72(%rdi) + adoxq %r8, %r9 + # A[10] * B + mulxq 80(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 80(%rdi) + adoxq %r8, %r10 + # A[11] * B + mulxq 88(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 88(%rdi) + adoxq %r8, %r9 + # A[12] * B + mulxq 96(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 96(%rdi) + adoxq %r8, %r10 + # A[13] * B + mulxq 104(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 104(%rdi) + adoxq %r8, %r9 + # A[14] * B + mulxq 112(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 112(%rdi) + adoxq %r8, %r10 + # A[15] * B + mulxq 120(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 120(%rdi) + adoxq %r8, %r9 + # A[16] * B + mulxq 128(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 128(%rdi) + adoxq %r8, %r10 + # A[17] * B + mulxq 136(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 136(%rdi) + adoxq %r8, %r9 + # A[18] * B + mulxq 144(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 144(%rdi) + adoxq %r8, %r10 + # A[19] * B + mulxq 152(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 152(%rdi) + adoxq %r8, %r9 + # A[20] * B + mulxq 160(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 160(%rdi) + adoxq %r8, %r10 + # A[21] * B + mulxq 168(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 168(%rdi) + adoxq %r8, %r9 + # A[22] * B + mulxq 176(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 176(%rdi) + adoxq %r8, %r10 + # A[23] * B + mulxq 184(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 184(%rdi) + adoxq %r8, %r9 + # A[24] * B + mulxq 192(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 192(%rdi) + adoxq %r8, %r10 + # A[25] * B + mulxq 200(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 200(%rdi) + adoxq %r8, %r9 + # A[26] * B + mulxq 208(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 208(%rdi) + adoxq %r8, %r10 + # A[27] * B + mulxq 216(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 216(%rdi) + adoxq %r8, %r9 + # A[28] * B + mulxq 224(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 224(%rdi) + adoxq %r8, %r10 + # A[29] * B + mulxq 232(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 232(%rdi) + adoxq %r8, %r9 + # A[30] * B + mulxq 240(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 240(%rdi) + adoxq %r8, %r10 + # A[31] * B + mulxq 248(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 248(%rdi) + adoxq %r8, %r9 + # A[32] * B + mulxq 256(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 256(%rdi) + adoxq %r8, %r10 + # A[33] * B + mulxq 264(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 264(%rdi) + adoxq %r8, %r9 + # A[34] * B + mulxq 272(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 272(%rdi) + adoxq %r8, %r10 + # A[35] * B + mulxq 280(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 280(%rdi) + adoxq %r8, %r9 + # A[36] * B + mulxq 288(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 288(%rdi) + adoxq %r8, %r10 + # A[37] * B + mulxq 296(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 296(%rdi) + adoxq %r8, %r9 + # A[38] * B + mulxq 304(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 304(%rdi) + adoxq %r8, %r10 + # A[39] * B + mulxq 312(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 312(%rdi) + adoxq %r8, %r9 + # A[40] * B + mulxq 320(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 320(%rdi) + adoxq %r8, %r10 + # A[41] * B + mulxq 328(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 328(%rdi) + adoxq %r8, %r9 + # A[42] * B + mulxq 336(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 336(%rdi) + adoxq %r8, %r10 + # A[43] * B + mulxq 344(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 344(%rdi) + adoxq %r8, %r9 + # A[44] * B + mulxq 352(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 352(%rdi) + adoxq %r8, %r10 + # A[45] * B + mulxq 360(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 360(%rdi) + adoxq %r8, %r9 + # A[46] * B + mulxq 368(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 368(%rdi) + adoxq %r8, %r10 + # A[47] * B + mulxq 376(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 376(%rdi) + adoxq %r8, %r9 + # A[48] * B + mulxq 384(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 384(%rdi) + adoxq %r8, %r10 + # A[49] * B + mulxq 392(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 392(%rdi) + adoxq %r8, %r9 + # A[50] * B + mulxq 400(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 400(%rdi) + adoxq %r8, %r10 + # A[51] * B + mulxq 408(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 408(%rdi) + adoxq %r8, %r9 + # A[52] * B + mulxq 416(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 416(%rdi) + adoxq %r8, %r10 + # A[53] * B + mulxq 424(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 424(%rdi) + adoxq %r8, %r9 + # A[54] * B + mulxq 432(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 432(%rdi) + adoxq %r8, %r10 + # A[55] * B + mulxq 440(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 440(%rdi) + adoxq %r8, %r9 + # A[56] * B + mulxq 448(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 448(%rdi) + adoxq %r8, %r10 + # A[57] * B + mulxq 456(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 456(%rdi) + adoxq %r8, %r9 + # A[58] * B + mulxq 464(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 464(%rdi) + adoxq %r8, %r10 + # A[59] * B + mulxq 472(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 472(%rdi) + adoxq %r8, %r9 + # A[60] * B + mulxq 480(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 480(%rdi) + adoxq %r8, %r10 + # A[61] * B + mulxq 488(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + movq %r10, 488(%rdi) + adoxq %r8, %r9 + # A[62] * B + mulxq 496(%rsi), %rcx, %r8 + movq %r11, %r10 + adcxq %rcx, %r9 + movq %r9, 496(%rdi) + adoxq %r8, %r10 + # A[63] * B + mulxq 504(%rsi), %rcx, %r8 + movq %r11, %r9 + adcxq %rcx, %r10 + adoxq %r8, %r9 + adcxq %r11, %r9 + movq %r10, 504(%rdi) + movq %r9, 512(%rdi) + repz retq +#ifndef __APPLE__ +.size sp_4096_mul_d_avx2_64,.-sp_4096_mul_d_avx2_64 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Compare a with b in constant time. + * + * a A single precision integer. + * b A single precision integer. + * return -ve, 0 or +ve if a is less than, equal to or greater than b + * respectively. + */ +#ifndef __APPLE__ +.globl sp_4096_cmp_64 +.type sp_4096_cmp_64,@function +.align 16 +sp_4096_cmp_64: +#else +.globl _sp_4096_cmp_64 +.p2align 4 +_sp_4096_cmp_64: +#endif /* __APPLE__ */ + xorq %rcx, %rcx + movq $-1, %rdx + movq $-1, %rax + movq $1, %r8 + movq 504(%rdi), %r9 + movq 504(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 496(%rdi), %r9 + movq 496(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 488(%rdi), %r9 + movq 488(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 480(%rdi), %r9 + movq 480(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 472(%rdi), %r9 + movq 472(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 464(%rdi), %r9 + movq 464(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 456(%rdi), %r9 + movq 456(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 448(%rdi), %r9 + movq 448(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 440(%rdi), %r9 + movq 440(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 432(%rdi), %r9 + movq 432(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 424(%rdi), %r9 + movq 424(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 416(%rdi), %r9 + movq 416(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 408(%rdi), %r9 + movq 408(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 400(%rdi), %r9 + movq 400(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 392(%rdi), %r9 + movq 392(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 384(%rdi), %r9 + movq 384(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 376(%rdi), %r9 + movq 376(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 368(%rdi), %r9 + movq 368(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 360(%rdi), %r9 + movq 360(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 352(%rdi), %r9 + movq 352(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 344(%rdi), %r9 + movq 344(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 336(%rdi), %r9 + movq 336(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 328(%rdi), %r9 + movq 328(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 320(%rdi), %r9 + movq 320(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 312(%rdi), %r9 + movq 312(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 304(%rdi), %r9 + movq 304(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 296(%rdi), %r9 + movq 296(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 288(%rdi), %r9 + movq 288(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 280(%rdi), %r9 + movq 280(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 272(%rdi), %r9 + movq 272(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 264(%rdi), %r9 + movq 264(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 256(%rdi), %r9 + movq 256(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 248(%rdi), %r9 + movq 248(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 240(%rdi), %r9 + movq 240(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 232(%rdi), %r9 + movq 232(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 224(%rdi), %r9 + movq 224(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 216(%rdi), %r9 + movq 216(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 208(%rdi), %r9 + movq 208(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 200(%rdi), %r9 + movq 200(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 192(%rdi), %r9 + movq 192(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 184(%rdi), %r9 + movq 184(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 176(%rdi), %r9 + movq 176(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 168(%rdi), %r9 + movq 168(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 160(%rdi), %r9 + movq 160(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 152(%rdi), %r9 + movq 152(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 144(%rdi), %r9 + movq 144(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 136(%rdi), %r9 + movq 136(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 128(%rdi), %r9 + movq 128(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 120(%rdi), %r9 + movq 120(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 112(%rdi), %r9 + movq 112(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 104(%rdi), %r9 + movq 104(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 96(%rdi), %r9 + movq 96(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 88(%rdi), %r9 + movq 88(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 80(%rdi), %r9 + movq 80(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 72(%rdi), %r9 + movq 72(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 64(%rdi), %r9 + movq 64(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 56(%rdi), %r9 + movq 56(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 48(%rdi), %r9 + movq 48(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 40(%rdi), %r9 + movq 40(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 32(%rdi), %r9 + movq 32(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 24(%rdi), %r9 + movq 24(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 16(%rdi), %r9 + movq 16(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq 8(%rdi), %r9 + movq 8(%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + movq (%rdi), %r9 + movq (%rsi), %r10 + andq %rdx, %r9 + andq %rdx, %r10 + subq %r10, %r9 + cmova %r8, %rax + cmovc %rdx, %rax + cmovnz %rcx, %rdx + xorq %rdx, %rax + repz retq +#ifndef __APPLE__ +.size sp_4096_cmp_64,.-sp_4096_cmp_64 +#endif /* __APPLE__ */ +#ifdef HAVE_INTEL_AVX2 +/* Reduce the number back to 4096 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +#ifndef __APPLE__ +.globl sp_4096_mont_reduce_avx2_64 +.type sp_4096_mont_reduce_avx2_64,@function +.align 16 +sp_4096_mont_reduce_avx2_64: +#else +.globl _sp_4096_mont_reduce_avx2_64 +.p2align 4 +_sp_4096_mont_reduce_avx2_64: +#endif /* __APPLE__ */ + push %r12 + push %r13 + push %r14 + movq %rdx, %rax + xorq %r14, %r14 + # i = 0 + movq $64, %r9 + movq (%rdi), %r13 + xorq %r12, %r12 +L_mont_loop_avx2_64: + # mu = a[i] * mp + movq %r13, %rdx + mulxq %rax, %rdx, %r8 + movq %r13, %r10 + # a[i+0] += m[0] * mu + mulxq (%rsi), %rcx, %r8 + movq 8(%rdi), %r13 + adcxq %rcx, %r10 + adoxq %r8, %r13 + # a[i+1] += m[1] * mu + mulxq 8(%rsi), %rcx, %r8 + movq 16(%rdi), %r10 + adcxq %rcx, %r13 + adoxq %r8, %r10 + # a[i+2] += m[2] * mu + mulxq 16(%rsi), %rcx, %r8 + movq 24(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 16(%rdi) + # a[i+3] += m[3] * mu + mulxq 24(%rsi), %rcx, %r8 + movq 32(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 24(%rdi) + # a[i+4] += m[4] * mu + mulxq 32(%rsi), %rcx, %r8 + movq 40(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 32(%rdi) + # a[i+5] += m[5] * mu + mulxq 40(%rsi), %rcx, %r8 + movq 48(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 40(%rdi) + # a[i+6] += m[6] * mu + mulxq 48(%rsi), %rcx, %r8 + movq 56(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 48(%rdi) + # a[i+7] += m[7] * mu + mulxq 56(%rsi), %rcx, %r8 + movq 64(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 56(%rdi) + # a[i+8] += m[8] * mu + mulxq 64(%rsi), %rcx, %r8 + movq 72(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 64(%rdi) + # a[i+9] += m[9] * mu + mulxq 72(%rsi), %rcx, %r8 + movq 80(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 72(%rdi) + # a[i+10] += m[10] * mu + mulxq 80(%rsi), %rcx, %r8 + movq 88(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 80(%rdi) + # a[i+11] += m[11] * mu + mulxq 88(%rsi), %rcx, %r8 + movq 96(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 88(%rdi) + # a[i+12] += m[12] * mu + mulxq 96(%rsi), %rcx, %r8 + movq 104(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 96(%rdi) + # a[i+13] += m[13] * mu + mulxq 104(%rsi), %rcx, %r8 + movq 112(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 104(%rdi) + # a[i+14] += m[14] * mu + mulxq 112(%rsi), %rcx, %r8 + movq 120(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 112(%rdi) + # a[i+15] += m[15] * mu + mulxq 120(%rsi), %rcx, %r8 + movq 128(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 120(%rdi) + # a[i+16] += m[16] * mu + mulxq 128(%rsi), %rcx, %r8 + movq 136(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 128(%rdi) + # a[i+17] += m[17] * mu + mulxq 136(%rsi), %rcx, %r8 + movq 144(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 136(%rdi) + # a[i+18] += m[18] * mu + mulxq 144(%rsi), %rcx, %r8 + movq 152(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 144(%rdi) + # a[i+19] += m[19] * mu + mulxq 152(%rsi), %rcx, %r8 + movq 160(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 152(%rdi) + # a[i+20] += m[20] * mu + mulxq 160(%rsi), %rcx, %r8 + movq 168(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 160(%rdi) + # a[i+21] += m[21] * mu + mulxq 168(%rsi), %rcx, %r8 + movq 176(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 168(%rdi) + # a[i+22] += m[22] * mu + mulxq 176(%rsi), %rcx, %r8 + movq 184(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 176(%rdi) + # a[i+23] += m[23] * mu + mulxq 184(%rsi), %rcx, %r8 + movq 192(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 184(%rdi) + # a[i+24] += m[24] * mu + mulxq 192(%rsi), %rcx, %r8 + movq 200(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 192(%rdi) + # a[i+25] += m[25] * mu + mulxq 200(%rsi), %rcx, %r8 + movq 208(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 200(%rdi) + # a[i+26] += m[26] * mu + mulxq 208(%rsi), %rcx, %r8 + movq 216(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 208(%rdi) + # a[i+27] += m[27] * mu + mulxq 216(%rsi), %rcx, %r8 + movq 224(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 216(%rdi) + # a[i+28] += m[28] * mu + mulxq 224(%rsi), %rcx, %r8 + movq 232(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 224(%rdi) + # a[i+29] += m[29] * mu + mulxq 232(%rsi), %rcx, %r8 + movq 240(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 232(%rdi) + # a[i+30] += m[30] * mu + mulxq 240(%rsi), %rcx, %r8 + movq 248(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 240(%rdi) + # a[i+31] += m[31] * mu + mulxq 248(%rsi), %rcx, %r8 + movq 256(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 248(%rdi) + # a[i+32] += m[32] * mu + mulxq 256(%rsi), %rcx, %r8 + movq 264(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 256(%rdi) + # a[i+33] += m[33] * mu + mulxq 264(%rsi), %rcx, %r8 + movq 272(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 264(%rdi) + # a[i+34] += m[34] * mu + mulxq 272(%rsi), %rcx, %r8 + movq 280(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 272(%rdi) + # a[i+35] += m[35] * mu + mulxq 280(%rsi), %rcx, %r8 + movq 288(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 280(%rdi) + # a[i+36] += m[36] * mu + mulxq 288(%rsi), %rcx, %r8 + movq 296(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 288(%rdi) + # a[i+37] += m[37] * mu + mulxq 296(%rsi), %rcx, %r8 + movq 304(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 296(%rdi) + # a[i+38] += m[38] * mu + mulxq 304(%rsi), %rcx, %r8 + movq 312(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 304(%rdi) + # a[i+39] += m[39] * mu + mulxq 312(%rsi), %rcx, %r8 + movq 320(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 312(%rdi) + # a[i+40] += m[40] * mu + mulxq 320(%rsi), %rcx, %r8 + movq 328(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 320(%rdi) + # a[i+41] += m[41] * mu + mulxq 328(%rsi), %rcx, %r8 + movq 336(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 328(%rdi) + # a[i+42] += m[42] * mu + mulxq 336(%rsi), %rcx, %r8 + movq 344(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 336(%rdi) + # a[i+43] += m[43] * mu + mulxq 344(%rsi), %rcx, %r8 + movq 352(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 344(%rdi) + # a[i+44] += m[44] * mu + mulxq 352(%rsi), %rcx, %r8 + movq 360(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 352(%rdi) + # a[i+45] += m[45] * mu + mulxq 360(%rsi), %rcx, %r8 + movq 368(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 360(%rdi) + # a[i+46] += m[46] * mu + mulxq 368(%rsi), %rcx, %r8 + movq 376(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 368(%rdi) + # a[i+47] += m[47] * mu + mulxq 376(%rsi), %rcx, %r8 + movq 384(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 376(%rdi) + # a[i+48] += m[48] * mu + mulxq 384(%rsi), %rcx, %r8 + movq 392(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 384(%rdi) + # a[i+49] += m[49] * mu + mulxq 392(%rsi), %rcx, %r8 + movq 400(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 392(%rdi) + # a[i+50] += m[50] * mu + mulxq 400(%rsi), %rcx, %r8 + movq 408(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 400(%rdi) + # a[i+51] += m[51] * mu + mulxq 408(%rsi), %rcx, %r8 + movq 416(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 408(%rdi) + # a[i+52] += m[52] * mu + mulxq 416(%rsi), %rcx, %r8 + movq 424(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 416(%rdi) + # a[i+53] += m[53] * mu + mulxq 424(%rsi), %rcx, %r8 + movq 432(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 424(%rdi) + # a[i+54] += m[54] * mu + mulxq 432(%rsi), %rcx, %r8 + movq 440(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 432(%rdi) + # a[i+55] += m[55] * mu + mulxq 440(%rsi), %rcx, %r8 + movq 448(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 440(%rdi) + # a[i+56] += m[56] * mu + mulxq 448(%rsi), %rcx, %r8 + movq 456(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 448(%rdi) + # a[i+57] += m[57] * mu + mulxq 456(%rsi), %rcx, %r8 + movq 464(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 456(%rdi) + # a[i+58] += m[58] * mu + mulxq 464(%rsi), %rcx, %r8 + movq 472(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 464(%rdi) + # a[i+59] += m[59] * mu + mulxq 472(%rsi), %rcx, %r8 + movq 480(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 472(%rdi) + # a[i+60] += m[60] * mu + mulxq 480(%rsi), %rcx, %r8 + movq 488(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 480(%rdi) + # a[i+61] += m[61] * mu + mulxq 488(%rsi), %rcx, %r8 + movq 496(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 488(%rdi) + # a[i+62] += m[62] * mu + mulxq 496(%rsi), %rcx, %r8 + movq 504(%rdi), %r11 + adcxq %rcx, %r10 + adoxq %r8, %r11 + movq %r10, 496(%rdi) + # a[i+63] += m[63] * mu + mulxq 504(%rsi), %rcx, %r8 + movq 512(%rdi), %r10 + adcxq %rcx, %r11 + adoxq %r8, %r10 + movq %r11, 504(%rdi) + adcxq %r14, %r10 + movq %r12, %r14 + adoxq %r12, %r14 + adcxq %r12, %r14 + movq %r10, 512(%rdi) + # i += 1 + addq $8, %rdi + decq %r9 + jnz L_mont_loop_avx2_64 + movq %r13, (%rdi) + negq %r14 + movq %r14, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + movq %rdi, %rdi + subq $512, %rdi +#ifndef __APPLE__ + callq sp_4096_cond_sub_64@plt +#else + callq _sp_4096_cond_sub_64 +#endif /* __APPLE__ */ + pop %r14 + pop %r13 + pop %r12 + repz retq +#ifndef __APPLE__ +.size sp_4096_mont_reduce_avx2_64,.-sp_4096_mont_reduce_avx2_64 +#endif /* __APPLE__ */ +#endif /* HAVE_INTEL_AVX2 */ +/* Shift number left by n bit. (r = a << n) + * + * r Result of left shift by n. + * a Number to shift. + * n Amoutnt o shift. + */ +#ifndef __APPLE__ +.globl sp_4096_lshift_64 +.type sp_4096_lshift_64,@function +.align 16 +sp_4096_lshift_64: +#else +.globl _sp_4096_lshift_64 +.p2align 4 +_sp_4096_lshift_64: +#endif /* __APPLE__ */ + movq %rdx, %rcx + movq $0, %r10 + movq 472(%rsi), %r11 + movq 480(%rsi), %rdx + movq 488(%rsi), %rax + movq 496(%rsi), %r8 + movq 504(%rsi), %r9 + shldq %cl, %r9, %r10 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 480(%rdi) + movq %rax, 488(%rdi) + movq %r8, 496(%rdi) + movq %r9, 504(%rdi) + movq %r10, 512(%rdi) + movq 440(%rsi), %r9 + movq 448(%rsi), %rdx + movq 456(%rsi), %rax + movq 464(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 448(%rdi) + movq %rax, 456(%rdi) + movq %r8, 464(%rdi) + movq %r11, 472(%rdi) + movq 408(%rsi), %r11 + movq 416(%rsi), %rdx + movq 424(%rsi), %rax + movq 432(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 416(%rdi) + movq %rax, 424(%rdi) + movq %r8, 432(%rdi) + movq %r9, 440(%rdi) + movq 376(%rsi), %r9 + movq 384(%rsi), %rdx + movq 392(%rsi), %rax + movq 400(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 384(%rdi) + movq %rax, 392(%rdi) + movq %r8, 400(%rdi) + movq %r11, 408(%rdi) + movq 344(%rsi), %r11 + movq 352(%rsi), %rdx + movq 360(%rsi), %rax + movq 368(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 352(%rdi) + movq %rax, 360(%rdi) + movq %r8, 368(%rdi) + movq %r9, 376(%rdi) + movq 312(%rsi), %r9 + movq 320(%rsi), %rdx + movq 328(%rsi), %rax + movq 336(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 320(%rdi) + movq %rax, 328(%rdi) + movq %r8, 336(%rdi) + movq %r11, 344(%rdi) + movq 280(%rsi), %r11 + movq 288(%rsi), %rdx + movq 296(%rsi), %rax + movq 304(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 288(%rdi) + movq %rax, 296(%rdi) + movq %r8, 304(%rdi) + movq %r9, 312(%rdi) + movq 248(%rsi), %r9 + movq 256(%rsi), %rdx + movq 264(%rsi), %rax + movq 272(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 256(%rdi) + movq %rax, 264(%rdi) + movq %r8, 272(%rdi) + movq %r11, 280(%rdi) + movq 216(%rsi), %r11 + movq 224(%rsi), %rdx + movq 232(%rsi), %rax + movq 240(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 224(%rdi) + movq %rax, 232(%rdi) + movq %r8, 240(%rdi) + movq %r9, 248(%rdi) + movq 184(%rsi), %r9 + movq 192(%rsi), %rdx + movq 200(%rsi), %rax + movq 208(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 192(%rdi) + movq %rax, 200(%rdi) + movq %r8, 208(%rdi) + movq %r11, 216(%rdi) + movq 152(%rsi), %r11 + movq 160(%rsi), %rdx + movq 168(%rsi), %rax + movq 176(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 160(%rdi) + movq %rax, 168(%rdi) + movq %r8, 176(%rdi) + movq %r9, 184(%rdi) + movq 120(%rsi), %r9 + movq 128(%rsi), %rdx + movq 136(%rsi), %rax + movq 144(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 128(%rdi) + movq %rax, 136(%rdi) + movq %r8, 144(%rdi) + movq %r11, 152(%rdi) + movq 88(%rsi), %r11 + movq 96(%rsi), %rdx + movq 104(%rsi), %rax + movq 112(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 96(%rdi) + movq %rax, 104(%rdi) + movq %r8, 112(%rdi) + movq %r9, 120(%rdi) + movq 56(%rsi), %r9 + movq 64(%rsi), %rdx + movq 72(%rsi), %rax + movq 80(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r9, %rdx + movq %rdx, 64(%rdi) + movq %rax, 72(%rdi) + movq %r8, 80(%rdi) + movq %r11, 88(%rdi) + movq 24(%rsi), %r11 + movq 32(%rsi), %rdx + movq 40(%rsi), %rax + movq 48(%rsi), %r8 + shldq %cl, %r8, %r9 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shldq %cl, %r11, %rdx + movq %rdx, 32(%rdi) + movq %rax, 40(%rdi) + movq %r8, 48(%rdi) + movq %r9, 56(%rdi) + movq (%rsi), %rdx + movq 8(%rsi), %rax + movq 16(%rsi), %r8 + shldq %cl, %r8, %r11 + shldq %cl, %rax, %r8 + shldq %cl, %rdx, %rax + shlq %cl, %rdx + movq %rdx, (%rdi) + movq %rax, 8(%rdi) + movq %r8, 16(%rdi) + movq %r11, 24(%rdi) + repz retq +#endif /* WOLFSSL_SP_4096 */ /* Conditionally copy a into r using the mask m. * m is -1 to copy and 0 when not. * diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index f0fb014ac..a5c409f47 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -78,6 +78,8 @@ WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res); WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); #ifdef __cplusplus } /* extern "C" */ #endif @@ -3949,6 +3951,11 @@ static int fp_prime_miller_rabin_ex(fp_int * a, fp_int * b, int *result, sp_ModExp_3072(b, r, a, y); else #endif +#ifdef WOLFSSL_SP_4096 + if (fp_count_bits(a) == 4096) + sp_ModExp_4096(b, r, a, y); + else +#endif #endif fp_exptmod(b, r, a, y); diff --git a/wolfssl/wolfcrypt/sp.h b/wolfssl/wolfcrypt/sp.h index ed1db955c..ff79a1fac 100644 --- a/wolfssl/wolfcrypt/sp.h +++ b/wolfssl/wolfcrypt/sp.h @@ -62,6 +62,12 @@ WOLFSSL_LOCAL int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm, byte* out, word32* outLen); +WOLFSSL_LOCAL int sp_RsaPublic_4096(const byte* in, word32 inLen, + mp_int* em, mp_int* mm, byte* out, word32* outLen); +WOLFSSL_LOCAL int sp_RsaPrivate_4096(const byte* in, word32 inLen, + mp_int* dm, mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, + mp_int* mm, byte* out, word32* outLen); + #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA) @@ -74,6 +80,8 @@ WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res); WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res); +WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, + mp_int* res); #endif @@ -83,6 +91,8 @@ WOLFSSL_LOCAL int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen, mp_int* mod, byte* out, word32* outLen); WOLFSSL_LOCAL int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, mp_int* mod, byte* out, word32* outLen); +WOLFSSL_LOCAL int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen, + mp_int* mod, byte* out, word32* outLen); #endif /* WOLFSSL_HAVE_SP_DH */ diff --git a/wolfssl/wolfcrypt/sp_int.h b/wolfssl/wolfcrypt/sp_int.h index ed8e0fe1c..6215c072f 100644 --- a/wolfssl/wolfcrypt/sp_int.h +++ b/wolfssl/wolfcrypt/sp_int.h @@ -105,18 +105,24 @@ #else #define SP_INT_DIGITS ((256 + SP_WORD_SIZE) / SP_WORD_SIZE) #endif -#elif defined(WOLFSSL_SP_NO_3072) +#elif defined(WOLFSSL_SP_4096) #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN) - #define SP_INT_DIGITS ((4096 + SP_WORD_SIZE) / SP_WORD_SIZE) + #define SP_INT_DIGITS ((8192 + SP_WORD_SIZE) / SP_WORD_SIZE) #else - #define SP_INT_DIGITS ((2048 + SP_WORD_SIZE) / SP_WORD_SIZE) + #define SP_INT_DIGITS ((4096 + SP_WORD_SIZE) / SP_WORD_SIZE) #endif -#else +#elif !defined(WOLFSSL_SP_NO_3072) #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN) #define SP_INT_DIGITS ((6144 + SP_WORD_SIZE) / SP_WORD_SIZE) #else #define SP_INT_DIGITS ((3072 + SP_WORD_SIZE) / SP_WORD_SIZE) #endif +#else + #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN) + #define SP_INT_DIGITS ((4096 + SP_WORD_SIZE) / SP_WORD_SIZE) + #else + #define SP_INT_DIGITS ((2048 + SP_WORD_SIZE) / SP_WORD_SIZE) + #endif #endif #define sp_isodd(a) ((a)->used != 0 && ((a)->dp[0] & 1))