diff --git a/c/exp2.hpp b/c/exp2.hpp index c2ef45b..635cc65 100644 --- a/c/exp2.hpp +++ b/c/exp2.hpp @@ -24,5 +24,5 @@ void exp(Field::Element& res, Field::Element& base, uint8_t* scalar, unsigned in i--; } - delete naf; -} \ No newline at end of file + delete [] naf; +} diff --git a/src/armbuilder.js b/src/armbuilder.js new file mode 100644 index 0000000..57ad918 --- /dev/null +++ b/src/armbuilder.js @@ -0,0 +1,1675 @@ +const bigInt = require("big-integer"); +const assert = require("assert"); + +module.exports.genFuncs = genFuncs + +class Reg { + constructor(number) { + this.number = number; + } + valueOf() { return this.number; } + toString() { + if (this.number === 31) + return "xzr"; + else + return `x${this.number}`; + } +} + +class RegVar extends Array { + constructor(...args) { + super(...args.map(x => new Reg(x))); + this.cur = 0; + } + + toString() { + return this.join(", "); + } + + getNext() { + console.assert(this.length, "RegVar is empty, cannot get next Reg."); + + let reg = this[this.cur]; + this.cur = (this.cur + 1) % this.length; + + return reg; + } + + rewind() { + this.cur = 0; + } +} + +function removeSlice(regVar, start, number) { + let piece = regVar.slice(); + + piece.splice(start, number); + return piece; +} + +function cyclicCopy(regVar, width) { + let regVarCopy = regVar.slice(); + + let newVar = []; + for(let i = 0; i < width; i++) { + newVar.push(regVarCopy.getNext()); + } + return newVar; +} + +function expand(regVar, newReg, width) +{ + let newVar = regVar.slice(); + while(newVar.length < width) { + newVar.push(new Reg(newReg)); + } + return newVar; +} + +class RegPool { + constructor(numParamRegs) { + this.numParamRegs = numParamRegs; + this.m_maxAvailReg = 29; + this.regPool = []; + this.savedRegs = []; + + for (let reg = numParamRegs; reg <= this.m_maxAvailReg; reg++) { + if (reg !== 18) { + this.regPool.push(new Reg(reg)); + } + } + } + + assignRegs(numRegs) { + let regs = new RegVar(); + + for(let i = 0; i < numRegs; i++) { + console.assert(this.regPool.length, "RegPool is empty, cannot assign Reg."); + + const reg = this.regPool.shift(); + regs.push(reg); + + if (reg >= 19) { + this.savedRegs.push(reg); + } + } + return regs; + } + + releaseRegs(regs) { + this.regPool.push(...regs); + } + + hasSavedRegs() { + return this.savedRegs.length > 0; + } + + getSavedRegs() { + return this.savedRegs.slice(); + } +} + +class GenBase { + constructor(width, space, name, numParamRegs, numWorkRegs) { + this.code = []; + this.Indent = " "; + this.width = width; + this.space = space; + this.name = name; + this.regPool = new RegPool(numParamRegs); + this.workRegs = this.regPool.assignRegs(numWorkRegs); + + assert(width > 0); + + this.op_func_name(); + } + + toString() { + return this.getCode(); + } + + getCode() { + return this.code.join("\n")+"\n"; + } + + add_line(...line) { + this.code.push(line.join("")); + } + + op(instrName, ...args) { + let instr = this.Indent; + + if (args.length) { + let operands = args.map(s => s.toString().padStart(3)).join(", "); + instr += `${instrName.padEnd(5)} ${operands}`; + } else { + instr += `${instrName}`; + } + this.add_line(instr); + } + + op_func_name() { + this.add_line(`${this.space}_${this.name}:`); + this.add_line(`_${this.space}_${this.name}:`); + } + + op_label(label) { + this.add_line(`${label}:`); + } + + op_comment(...comment) { + this.add_line(this.Indent + "// ", ...comment); + } + + op_debug(...str) { + this.op_comment(...str); + } + + op_empty() { + this.add_line(""); + } + + makeLabel(label_name) { + return this.space + "_" + this.name + "_" + label_name; + } + + getVarName(name) { + return this.space + "_" + name; + } + + getMem(regNum, offset) { + if (offset === 0) { + return "[x" + regNum + "]"; + } + return "[x" + regNum + ", " + offset + "]"; + } + + getMemWord(regNum, wordNum) { + return this.getMem(regNum, wordNum * 8); + } + + hasSavedRegs() { + return this.regPool.hasSavedRegs(); + } + + pushRegs() { + let saved = this.regPool.getSavedRegs(); + let size = saved.length; + + for (let i = 0; i 0; i -= 2) { + this.op("ldp", saved[i-2], saved[i-1], "[sp], #16"); + } + } + + assignRegs(regs, numRegs) { + return this.regPool.assignRegs(regs, numRegs); + } + + releaseRegs(regs) { + this.regPool.releaseRegs(regs); + } + + getNextWorkReg() { + return this.workRegs.getNext(); + } + + genStoreWord(regVar, i, memArg) + { + if (i % 2 == 0) { + if (i === regVar.length - 1) { + this.op("str", regVar[i], memArg); + } else { + this.op("stp", regVar[i], regVar[i+1], memArg); + } + } + } + + genLoadWord(regVar, i, memArg) + { + if (i % 2 == 0) { + if (i === regVar.length - 1) { + this.op("ldr", regVar[i], memArg); + } else { + this.op("ldp", regVar[i], regVar[i+1], memArg); + } + } + } + + genAddReg(r, a, b, i) { + this.op(i ? "adcs" : "adds", r, a, b); + } + + genAddWord(r, a, b, i) { + this.genAddReg(r[i], a[i], b[i], i); + } + + genAdd(r, a, b) { + for (let i = 0; i < this.width; i++) { + this.genAddWord(r, a, b, i); + } + } + + genSubReg(r, a, b, i) { + this.op(i ? "sbcs" : "subs", r, a, b); + } + + genSubWord(r, a, b, i) { + this.genSubReg(r[i], a[i], b[i], i); + } + + genSub(r, a, b) { + for (let i = 0; i < this.width; i++) { + this.genSubWord(r, a, b, i); + } + } + + genSubLoad(r, a, b, memReg) { + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(b, i, memReg); + this.genSubWord(r, a, b, i); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + } + + genCselStore(r, a, b, memReg) + { + let j = 0; + for (let i = 0; i < this.width; i++) { + this.genCselWord(r, a, b, i); + if (i > 0) + this.genStoreVarWord(r, j++, memReg); + if (i % 2) this.op_empty(); + } + this.genStoreVarWord(r, j, memReg); + } + + genCselWord(r, a, b, i) { + this.op("csel", r[i], a[i], b[i], "hs"); + } + + genStoreVarWord(regVar, i, memReg) { + this.genStoreWord(regVar, i, this.getMemWord(memReg, i)); + } + + genLoadVarWord(regVar, i, memReg) { + this.genLoadWord(regVar, i, this.getMemWord(memReg, i)); + } + + genLoadVar(regVar, memReg) { + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(regVar, i, memReg); + } + } + + genStoreVar(regVar, memReg) { + for (let i = 0; i < this.width; i++) { + this.genStoreVarWord(regVar, i, memReg); + } + } + + genLboMask(lastWord, maskReg) { + this.op("adr", maskReg, this.getVarName("lboMask")); + this.op("ldr", maskReg, this.getMem(maskReg, 0)); + this.op("and", lastWord, lastWord, maskReg); + this.op_empty(); + } +} + +class Gen_rawIsZero extends GenBase { + constructor(width, space) { + super(width, space, "rawIsZero", 1, 16) + this.generate(); + } + + generate() { + this.genFold(); + + this.op("cmp", this.accum, "xzr"); + this.op("cset", "x0", "eq"); + this.op("ret"); + } + + genFold() { + const chunkCount = Math.trunc(this.width / 4); + const lastWidth = this.width % 4; + + let i = 0; + for (; i < chunkCount; i += 1) { + this.genChunk(4, i); + } + + if (lastWidth) { + this.genChunk(lastWidth, i); + } + } + + genChunk(width, i) { + let r = this.genStep(width, i * 4); + + if (this.accum === undefined) { + this.accum = r; + } else { + this.op("orr", "x17", this.accum, r); + + this.accum = "x17"; + } + + this.op_empty(); + } + + genStep(width, i) { + assert(width > 0); + + if (width === 1) { + const r = this.getNextWorkReg(); + + this.op("ldr", r, this.getMemWord(0, i)); + + return r; + + } else if (width === 2) { + const a = this.getNextWorkReg(); + const b = this.getNextWorkReg(); + const r = this.getNextWorkReg(); + + this.op("ldp", a, b, this.getMemWord(0, i)); + this.op("orr", r, a, b); + + return r; + + } else { + const a = this.genStep(2, i); + this.op_empty(); + const b = this.genStep(width - 2, i + 2); + const r = this.getNextWorkReg(); + + this.op("orr", r, a, b); + + return r; + } + } +} + +class Gen_rawIsEq extends GenBase { + constructor(width, space) { + super(width, space, "rawIsEq", 2, 3); + + this.a1 = cyclicCopy(this.assignRegs(4), width); + this.b1 = cyclicCopy(this.assignRegs(4), width); + this.c1 = cyclicCopy(this.assignRegs(4), width); + + this.generate(); + } + + generate() { + if (this.longNumber()) { + this.accum = "x17"; + + this.op("mov", "x17", "xzr"); + this.op_empty(); + } + + this.genFold(); + + this.op("cmp", this.accum, "xzr"); + this.op("cset", "x0", "eq"); + this.op("ret"); + } + + genFold() { + const chunkCount = Math.trunc(this.width / 4); + const lastWidth = this.width % 4; + + let i = 0; + for (; i < chunkCount; i += 1) { + this.genChunk(4, i); + } + + if (lastWidth) { + this.genChunk(lastWidth, i); + } + } + + genChunk(width, i) { + let r = this.genStep(width, i * 4); + + if (this.longNumber()) { + this.op("orr", "x17", "x17", r); + } else { + if (this.accum === undefined) { + this.accum = r; + } else { + this.op("orr", "x17", this.accum, r); + + this.accum = "x17"; + } + } + this.op_empty(); + } + + genStep(width, i) { + assert(width > 0); + + if (width === 1) { + const r = this.getNextWorkReg(); + + this.op("ldr", this.a1[i], this.getMemWord(0, i)); + this.op("ldr", this.b1[i], this.getMemWord(1, i)); + this.op("eor", r, this.a1[i], this.b1[i]); + + return r; + } else if (width === 2) { + const r = this.getNextWorkReg(); + + this.op("ldp", this.a1[i], this.a1[i+1], this.getMemWord(0, i)); + this.op("ldp", this.b1[i], this.b1[i+1], this.getMemWord(1, i)); + this.op("eor", this.c1[i], this.a1[i], this.b1[i]); + this.op("eor", this.c1[i+1], this.a1[i+1], this.b1[i+1]); + this.op("orr", r, this.c1[i], this.c1[i+1]); + + return r; + } else { + const a = this.genStep(2, i); + this.op_empty(); + const b = this.genStep(width - 2, i + 2); + const r = this.getNextWorkReg(); + + this.op("orr", r, a, b); + + return r; + } + } + + longNumber() { + return (this.width >= 8); + } +} + +class Gen_rawSwap extends GenBase { + constructor(width, space) { + super(width, space, "rawSwap", 2, 0); + + this.a1 = cyclicCopy(this.assignRegs(8), width); + this.b1 = cyclicCopy(this.assignRegs(8), width); + + this.generate(); + } + + generate() { + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.a1, i, 0); + this.genLoadVarWord(this.b1, i, 1); + this.genStoreVarWord(this.a1, i, 1); + this.genStoreVarWord(this.b1, i, 0); + if (i % 2) this.op_empty(); + } + this.op("ret"); + } +} + +class Gen_rawCopy extends GenBase { + constructor(width, space) { + super(width, space, "rawCopy", 2, 0); + + this.a1 = cyclicCopy(this.assignRegs(8), this.width); + + this.generate(); + } + + generate() { + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.a1, i, 1); + this.genStoreVarWord(this.a1, i, 0); + if (i % 2) this.op_empty(); + } + this.op("ret"); + } +} + +class Gen_rawCopyS2L extends GenBase { + constructor(width, space) { + super(width, space, "rawCopyS2L", 4, 0); + + this.q1 = cyclicCopy(this.assignRegs(6), this.width); + this.r1 = cyclicCopy(this.assignRegs(6), this.width); + + this.generate(); + } + + generate() { + let label = this.makeLabel("adjust_neg"); + + this.op("cmp", "x1", "xzr"); + this.op("b.lt", label); + this.op_empty(); + + let a1 = expand(new RegVar(1), 31, this.width); + this.genStoreVar(a1, 0); + this.op("ret"); + this.op_empty(); + + this.op_label(label); + this.genPositive(); + this.op("ret"); + } + + genPositive() { + if (this.width > 1) { + this.op("mov", "x2", "-1"); + } + + this.op("adr", "x3", this.getVarName("rawq")); + this.op_empty(); + + let a1 = expand(new RegVar(1), 2, this.width); + + let j = 0; + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.q1, i, 3); + this.genAddWord(this.r1, a1, this.q1, i); + if (i > 0) + this.genStoreVarWord(this.r1, j++, 0); + if (i % 2) this.op_empty(); + } + this.genStoreVarWord(this.r1, j, 0); + if (this.width % 2) this.op_empty(); + } +} + +class Gen_rawCmp extends GenBase { + constructor(width, space) { + super(width, space, "rawCmp", 3, 0); + + this.a1 = cyclicCopy(this.assignRegs(4), this.width); + this.b1 = cyclicCopy(this.assignRegs(4), this.width); + + this.generate(); + } + + generate() { + this.genCmp(); + this.op("cneg", "x0", "x2", "lo"); + this.op("ret"); + } + + + genCmp() { + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.a1, i, 0); + this.genLoadVarWord(this.b1, i, 1); + this.genSubWord(this.a1, this.a1, this.b1, i); + this.genAccum(i); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + } + + genAccum(i) { + if (i === 0) { + this.op("cset", "x2", "ne"); + } else { + this.op("cinc", "x2", "x2", "ne"); + } + } +} + +class Gen_rawBinOp extends GenBase { + constructor(width, space, name, binOp) { + super(width, space, name, 4, 4); + + assert(width <= 12); + + this.binOp = binOp; + this.unary = this. isUnary(binOp); + + this.r1 = this.assignRegs(this.width); + this.b1 = cyclicCopy(this.workRegs, this.width); + this.releaseRegs(this.workRegs); + this.r2 = this.assignRegs(this.width); + this.generate(); + } + + generate() { + this.pushRegs(); + + this.genBinaryOp(); + + this.op("adr", "x3", this.getVarName("rawq")); + this.genSubLoad(this.r2, this.r1, this.r2, 3); + this.genCselStore(this.r1, this.r2, this.r1, 0); + + this.popRegs(); + this.op("ret"); + } + + + genBinaryOp() { + if (this.unary) + this.genBinaryOp1(); + else + this.genBinaryOp2(); + + this.genLboMask(this.r1[this.width-1], new Reg(2)); + } + + genBinaryOp1() { + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.r1, i, 1); + this.op(this.binOp, this.r1[i], this.r1[i]); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + } + + genBinaryOp2() { + + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.r1, i, 1); + this.genLoadVarWord(this.b1, i, 2); + this.op(this.binOp, this.r1[i], this.r1[i], this.b1[i]); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + } + + isUnary(binOp) { + return (binOp === "mvn"); + } +} + +class Gen_rawAdd extends GenBase { + constructor(width, space, name, shortOper2) { + super(width, space, name, 4, 4); + + assert(width <= 12); + + this.shortOper2 = shortOper2; + + this.r1 = this.assignRegs(this.width); + this.b1 = cyclicCopy(this.workRegs, this.width); + this.releaseRegs(this.workRegs); + this.r2 = this.assignRegs(this.width); + this.generate(); + } + + generate() { + this.pushRegs(); + + if (this.shortOper2) + this.genAdditionOpLS(); + else + this.genAdditionOp(); + + this.op("cset", "x2", "cs"); + this.op_empty(); + + this.genSubtraction(); + this.genResultStore(); + + this.op_empty(); + this.op_label(this.makeLabel("out")); + this.popRegs(); + this.op("ret"); + this.op_empty(); + } + + + genAdditionOp() { + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.r1, i, 1); + this.genLoadVarWord(this.b1, i, 2); + this.genAddWord(this.r1, this.r1, this.b1, i); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + } + + genAdditionOpLS() { + let b = expand(new RegVar(2), 31, this.width); + + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.r1, i, 1); + this.genAddWord(this.r1, this.r1, b, i); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + } + + genSubtraction() { + this.op("adr", "x3", this.getVarName("rawq")); + this.genSubLoad(this.r2, this.r1, this.r2, 3); + } + + genResultStore() { + const label = this.makeLabel("done_s"); + + this.op("cbnz", "x2", label); + this.op("b.hs", label); + this.op_empty(); + + this.genStoreVar(this.r1, 0); + this.op_empty(); + this.op("b", this.makeLabel("out")); + this.op_empty(); + this.op_label(label); + this.genStoreVar(this.r2, 0); + } +} + +class Gen_rawSub extends GenBase { + constructor(width, space, name, paramsType) { + super(width, space, name, 4, 4); + + assert(width <= 20); + + this.paramsType = paramsType; + + this.r1 = this.assignRegs(this.width); + this.b1 = cyclicCopy(this.workRegs, this.width); + this.releaseRegs(this.workRegs); + this.r2 = this.assignRegs(this.width); + this.generate(); + } + + generate() { + this.pushRegs(); + + if (this.paramsType === "LS") + this.genSubtractionOpLS(); + else if (this.paramsType === "SL") + this.genSubtractionOpSL(); + else + this.genSubtractionOp(); + + const doneLabel = this.makeLabel("done"); + this.op("b.cs", doneLabel); + this.op_empty(); + + this.genAddition(); + + this.op_label(doneLabel); + this.genStoreVar(this.r1, 0); + + this.popRegs(); + this.op("ret"); + this.op_empty(); + } + + + genSubtractionOp() { + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.r1, i, 1); + this.genLoadVarWord(this.b1, i, 2); + this.genSubWord(this.r1, this.r1, this.b1, i); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + } + + genSubtractionOpLS() { + let b = expand(new RegVar(2), 31, this.width); + + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.r1, i, 1); + this.genSubWord(this.r1, this.r1, b, i); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + } + + genSubtractionOpSL() { + let a = expand(new RegVar(1), 31, this.width); + this.genSubLoad(this.r1, a, this.r1, 2); + } + + genAddition() { + this.op("adr", "x3", this.getVarName("rawq")); + + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.b1, i, 3); + this.genAddWord(this.r1, this.r1, this.b1, i); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + } +} + +class Gen_rawSubRegular extends GenBase { + constructor(width, space, name) { + super(width, space, name, 4, 0); + + this.a1 = cyclicCopy(this.assignRegs(4), this.width); + this.b1 = cyclicCopy(this.assignRegs(4), this.width); + + this.generate(); + } + + generate() { + let j = 0; + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.a1, i, 1); + this.genLoadVarWord(this.b1, i, 2); + this.genSubWord(this.a1, this.a1, this.b1, i); + if (i > 0) + this.genStoreVarWord(this.a1, j++, 0); + if (i % 2) this.op_empty(); + } + + this.genStoreVarWord(this.a1, j, 0); + if (this.width % 2) this.op_empty(); + + this.op("ret"); + this.op_empty(); + } +} + +class Gen_rawNeg extends GenBase { + constructor(width, space, name) { + super(width, space, name, 4, 4); + + assert(width <= 20); + + this.r1 = this.assignRegs(this.width); + this.b1 = cyclicCopy(this.workRegs, this.width); + + this.generate(); + } + + generate() { + this.pushRegs(); + + this.genIsZero(); + + const doneLabel = this.makeLabel("done_zero"); + this.op("cbz", "x2", doneLabel); + this.op_empty(); + + this.genSubtraction(); + + if (this.hasSavedRegs()) + this.op("b", this.makeLabel("out")); + else + this.op("ret"); + this.op_empty(); + + this.op_label(doneLabel); + this.genZeroStore(); + this.op_empty(); + + if (this.hasSavedRegs()) + this.op_label(this.makeLabel("out")); + + this.popRegs(); + this.op("ret"); + this.op_empty(); + } + + + genIsZero() { + this.op("mov", "x2", "xzr"); + + let i = 0; + for (; i < this.width - 1; i += 2) { + let r = this.getNextWorkReg(); + + this.op("ldp", this.r1[i], this.r1[i+1], this.getMemWord(1, i)); + this.op("orr", r, this.r1[i], this.r1[i+1]); + this.op("orr", "x2", "x2", r); + this.op_empty(); + } + + if (this.width % 2) { + this.op("ldr", this.r1[i], this.getMemWord(1, i)); + this.op("orr", "x2", "x2", this.r1[i]); + this.op_empty(); + } + } + + genSubtraction() { + this.op("adr", "x3", this.getVarName("rawq")); + + let j = 0; + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.b1, i, 3); + this.genSubWord(this.r1, this.b1, this.r1, i); + if (i > 0) + this.genStoreVarWord(this.r1, j++, 0); + if (i % 2) this.op_empty(); + } + this.genStoreVarWord(this.r1, j, 0); + if (this.width % 2) this.op_empty(); + } + + genZeroStore() { + let zero = cyclicCopy(new RegVar(31), this.width); + this.genStoreVar(zero, 0); + } +} + +class Gen_rawNegLS extends GenBase { + constructor(width, space, name) { + super(width, space, name, 4, 4); + + assert(width <= 12); + + this.r1 = this.assignRegs(this.width); + this.a2 = cyclicCopy(this.workRegs, this.width); + this.releaseRegs(this.workRegs); + this.r2 = this.assignRegs(this.width); + this.generate(); + } + + generate() { + this.pushRegs(); + + this.genSubtractionOpQ(); + this.op_empty(); + this.genSubtractionOpA(); + this.op_empty(); + + this.genAdd(this.r2, this.r2, this.r1); + this.op_empty(); + + this.op_label(this.makeLabel("done")); + this.genStoreVar(this.r2, 0); + + this.popRegs(); + this.op("ret"); + this.op_empty(); + } + + + genSubtractionOpQ() { + this.op("adr", "x3", this.getVarName("rawq")); + + let c = expand(new RegVar(2), 31, this.width); + + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.r1, i, 3); + this.genSubWord(this.r2, this.r1, c, i); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + + this.op("cset", "x2", "cs"); + } + + genSubtractionOpA() { + for (let i = 0; i < this.width; i++) { + this.genLoadVarWord(this.a2, i, 1); + this.genSubWord(this.r2, this.r2, this.a2, i); + if (i % 2) this.op_empty(); + } + if (this.width % 2) this.op_empty(); + + this.op("cset", "x3", "cs"); + this.op("orr", "x3", "x3", "x2"); + this.op_empty(); + this.op("cbz", "x3", this.makeLabel("done")); + } +} + +class Gen_rawShr extends GenBase { + constructor(width, space, name) { + super(width, space, name, 6, 2); + + assert(width <= 12); + + this.r1 = this.assignRegs(this.width); + this.releaseRegs(this.workRegs); + this.c1 = cyclicCopy(this.workRegs, this.width); + + this.generate(); + } + + generate() { + if (this.width === 1) { + this.op("ldr", "x3", "[x1]"); + this.op("lsr", "x3", "x3", "x2"); + this.op("str", "x3", "[x0]"); + this.op("ret"); + return; + } + + this.pushRegs(); + this.genLoadVar(this.r1, 1); + this.op_empty(); + + this.genCalcShift(); + this.genCalcJump(); + + this.genShift(); + + if (this.hasSavedRegs()) { + this.op_label(this.makeLabel("done")); + this.popRegs(); + this.op("ret"); + this.op_empty(); + } + + if (this.width > 2) this.genJumpTable(); + this.op_empty(); + } + + + genCalcShift() { + this.op("and", "x3", "x2", "0x3f"); + this.op("mov", "x4", "0x3f"); + this.op("sub", "x4", "x4", "x3"); + this.op_empty(); + } + + genCalcJump() { + if (this.width > 2) { + this.op("lsr", "x2", "x2", "#6"); + this.op("adr", "x5", this.makeLabel("word_shift")); + this.op("ldr", "x5", "[x5, x2, lsl 3]"); + this.op("br", "x5"); + + } else { + this.op("tbnz", "x2", "6", this.makeLabel("word_shift_1")); + } + this.op_empty(); + } + + genJumpTable() { + this.op_label(this.makeLabel("word_shift")); + + for (let i = 0; i < this.width; i++) { + this.op(".quad", this.makeWordLabel(i)); + } + } + + genShift() { + let i = 0; + for (; i < this.width-1; i++) { + this.op_label(this.makeWordLabel(i)); + + this.genBitShift(i); + this.genStore(i); + + if (this.hasSavedRegs()) + this.op("b", this.makeLabel("done")); + else + this.op("ret"); + this.op_empty(); + } + + this.op_label(this.makeWordLabel(i)); + + this.genBitShift(i); + this.genStore(i); + + if (!this.hasSavedRegs()) + this.op("ret"); + + this.op_empty(); + } + + genBitShift(wordShift) { + assert(wordShift < this.width); + + let i = wordShift; + for (; i < this.width - 1; i++) { + let c0 = this.getNextWorkReg(); + + this.op("lsr", this.r1[i], this.r1[i], "x3"); + this.op("lsl", c0, this.r1[i+1], "x4"); + this.op("orr", this.r1[i], this.r1[i], c0, "lsl #1"); + this.op_empty(); + } + + this.op("lsr", this.r1[i], this.r1[i], "x3"); + this.op_empty(); + } + + genStore(wordShift) { + assert(wordShift < this.width); + + let a = new RegVar(); + for (let i = 0; i < this.width; i++) { + const j = wordShift + i; + a.push(j < this.width ? this.r1[j] : new Reg(31)); + } + + this.genStoreVar(a, 0); + } + + makeWordLabel(i) { + return this.makeLabel("word_shift") + "_" + i; + } +} + +class Gen_rawShl extends GenBase { + constructor(width, space, name) { + super(width, space, name, 7, 2); + + assert(width <= 12); + + this.r1 = this.assignRegs(this.width); + this.releaseRegs(this.workRegs); + this.releaseRegs(new RegVar(2, 3, 4, 5)); + this.r2 = this.assignRegs(this.width); + this.generate(); + } + + generate() { + if (this.width === 1) { + this.op("ldr", "x3", "[x1]"); + this.op("lsl", "x3", "x3", "x2"); + this.op("str", "x3", "[x0]"); + this.op("ret"); + return; + } + + this.pushRegs(); + this.genLoadVar(this.r1, 1); + this.op_empty(); + + this.genCalcShift(); + this.genCalcJump(); + + this.genShift(); + + this.op_label(this.makeLabel("sub")); + this.genLboMask(this.r1[this.width-1], new Reg(6)); + + this.op("adr", "x1", this.getVarName("rawq")); + this.genSubLoad(this.r2, this.r1, this.r2, 1); + this.genCselStore(this.r1, this.r2, this.r1, 0); + + this.popRegs(); + this.op("ret"); + + if (this.width > 2) this.genJumpTable(); + this.op_empty(); + } + + + genCalcShift() { + this.op("and", "x3", "x2", "0x3f"); + this.op("mov", "x4", "0x3f"); + this.op("sub", "x4", "x4", "x3"); + this.op_empty(); + } + + genCalcJump() { + if (this.width > 2) { + this.op("lsr", "x2", "x2", "#6"); + this.op("adr", "x5", this.makeLabel("word_shift")); + this.op("ldr", "x5", "[x5, x2, lsl 3]"); + this.op("br", "x5"); + + } else { + this.op("tbnz", "x2", "6", this.makeLabel("word_shift_1")); + } + this.op_empty(); + } + + genJumpTable() { + this.op_label(this.makeLabel("word_shift")); + + for (let i = 0; i < this.width; i++) { + this.op(".quad", this.makeWordLabel(i)); + } + } + + genShift() { + let i = 0; + for (; i < this.width; i++) { + this.op_label(this.makeWordLabel(i)); + this.genBitShift(i); + + if (i !== this.width -1) + { + this.op("b", this.makeLabel("sub")); + this.op_empty(); + } + } + } + + genBitShift(wordShift) { + assert(wordShift < this.width); + + let i = this.width - 1 - wordShift; + let j = this.width - 1; + for (; i > 0; i--, j--) { + let c0 = this.getNextWorkReg(); + + this.op("lsl", this.r1[j], this.r1[i], "x3"); + this.op("lsr", c0, this.r1[i-1], "x4"); + this.op("orr", this.r1[j], this.r1[j], c0, "lsr #1"); + this.op_empty(); + } + + this.op("lsl", this.r1[j--], this.r1[0], "x3"); + + for (; j >= 0; j--) { + this.op("mov", this.r1[j], "xzr"); + } + this.op_empty(); + } + + makeWordLabel(i) { + return this.makeLabel("word_shift") + "_" + i; + } +} + +class Gen_rawMul extends GenBase { + constructor(width, space, name, canOptimizeConsensys) { + super(width, space, name, 9, 0); + + assert(width <= 12); + + this.isShort = (width <= 6); + this.canOptimizeConsensys = canOptimizeConsensys; + + if (this.isShort) { + this.r1 = this.assignRegs(this.width+1); + this.b1 = this.assignRegs(this.width); + this.q1 = this.assignRegs(this.width); + this.r2 = this.r1.slice(1, this.width+1); + this.q2 = this.q1; + this.b2 = this.b1; + } else { + this.r1 = this.assignRegs(this.width+1); + this.workRegs = this.assignRegs(4); + this.b1 = cyclicCopy(this.workRegs, this.width); + this.q1 = cyclicCopy(this.workRegs, this.width); + + this.releaseRegs(this.workRegs); + this.releaseRegs(new RegVar(3,4,5,7)); + + this.r2 = this.r1.slice(1, this.width+1); + this.releaseRegs(removeSlice(this.r1, 1, this.width)); + this.q2 = cyclicCopy(new RegVar(1,2), this.width); + this.b2 = this.assignRegs(this.width); + } + this.generate(); + } + + generate() { + this.pushRegs(); + + if (this.isShort) { + this.genLoadVar(this.b1, 2); + this.op_empty(); + } + + this.op("adr", "x4", this.getVarName("np")); + this.op("ldr", "x4", this.getMem(4, 0)); + this.op_empty(); + + this.op("adr", "x6", this.getVarName("rawq")); + if (this.isShort) { + this.genLoadVar(this.q1, 6); + } + this.op_empty(); + + this.genMul(); + + this.op_comment("result ge ", this.getVarName("rawq")); + if (this.isShort) + this.genSub(this.b2, this.r2, this.q2); + else + this.genSubLoad(this.b2, this.r2, this.q2, 6); + this.op_empty(); + + if (!this.canOptimizeConsensys) { + this.op("cinc", "x8", "x8", "hs"); + this.op("cmp", "x8", "1"); + this.op_empty(); + } + + this.genCselStore(this.r2, this.b2, this.r2, 0); + + this.popRegs(); + this.op("ret"); + this.op_empty(); + } + + genMul() { + for (let i = 0; i < this.width; i++) { + if (i === 0) { + this.op_comment("product", i, " = pRawB * pRawA[", i, "]"); + } else { + this.op_comment("product", i, " = product", i-1, " + pRawB * pRawA[", i, "]"); + } + + this.op("ldr", "x3", this.getMemWord(1, i)); + + if (i === 0) { + this.genMulAB(i); + } else { + this.genAddMulAB(i); + } + + this.op_comment("np0 = Fq_np * product", i, "[0]"); + this.op("mul", "x5", "x4", this.r1[0]); + this.op_empty(); + + this.op_comment("product", i, " = product", i, " + Fq_rawq * np0"); + this.genMulRawq(i); + } + } + + genMulAB(w) { + let i = 0; + for(; i < this.width; i++) { + let ra = new Reg(3); + let rs = new Reg(7); + + if (!this.isShort) this.genLoadVarWord(this.b1, i, 2); + + this.op("mul", (i ? rs : this.r1[i]), this.b1[i], ra); + if (i) + this.genAddReg(this.r1[i], this.r1[i], rs, i-1); + + this.op("umulh", this.r1[i+1], this.b1[i], ra); + + } + if (i > 1) + this.op("adc", this.r1[i], this.r1[i], "xzr"); + this.op_empty(); + } + + genAddMulAB(w) { + let ra = new Reg(3); + + let i = 0; + for(; i < this.width; i++) { + if (!this.isShort) this.genLoadVarWord(this.b1, i, 2); + this.op("mul", this.r1[i], this.b1[i], ra); + this.genAddReg(this.r1[i], this.r1[i], this.r1[i+1], i); + } + + if (!this.canOptimizeConsensys) { + this.op("adcs", this.r1[i], "xzr", "x8"); + this.op("adc", "x8", "xzr", "xzr"); + this.op_empty(); + } else { + this.op("adc", this.r1[i], "xzr", "xzr"); + this.op_empty(); + this.op("adds", this.r1[1], this.r1[1], "x5"); + } + + let t1 = cyclicCopy(new RegVar(7, 5), this.width); + + for(i = 0; i < this.width; i++) { + let iAdd = (i === this.width - 1) ? "adc" : "adcs"; + + if (!this.canOptimizeConsensys) { + iAdd = (i) ? "adcs" : "adds"; + } + + if (!this.isShort) this.genLoadVarWord(this.b1, i, 2); + this.op("umulh", t1[i], this.b1[i], ra); + this.op(iAdd, this.r1[i+1], this.r1[i+1], t1[i]); + } + + if (!this.canOptimizeConsensys) { + this.op("adc", "x8", "x8", "xzr"); + } + this.op_empty(); + } + + genMulRawq(w) { + let t1 = cyclicCopy(new RegVar(7, 3), this.width); + let np0 = new Reg(5); + + let i = 0; + for(; i < this.width; i++) { + if (!this.isShort) this.genLoadVarWord(this.q1, i, 6); + this.op("mul", t1[i], this.q1[i], np0); + this.genAddReg(this.r1[i], this.r1[i], t1[i], i); + } + if (!this.canOptimizeConsensys) { + this.op("adcs", this.r1[i], this.r1[i], "xzr"); + if (w) { + this.op("adc", "x8", "x8", "xzr"); + } else { + this.op("adc", "x8", "xzr", "xzr"); + } + + } else { + this.op("adc", this.r1[i], this.r1[i], "xzr"); + } + this.op_empty(); + + for(i = 0; i < this.width; i++) { + if (!this.isShort) this.genLoadVarWord(this.q1, i, 6); + this.op("umulh", t1[i], this.q1[i], np0); + this.genAddReg(this.r1[i+1], this.r1[i+1], t1[i], i); + } + + if (!this.canOptimizeConsensys) { + this.op("adc", "x8", "x8", "xzr"); + } else + { + if (w < this.width - 1) { + this.op("adc", np0, "xzr", "xzr"); + } + } + this.op_empty(); + } +} + +class Gen_rawMul1 extends GenBase { + constructor(width, space, name, mulAB, canOptimizeConsensys) { + super(width, space, name, 9, 0); + + assert(width <= 12); + + this.isShort = (width <= 6); + this.mulAB = mulAB; + this.canOptimizeConsensys = canOptimizeConsensys; + + if (this.isShort) { + this.r1 = this.assignRegs(this.width+1); + this.b1 = this.assignRegs(this.width); + this.q1 = this.assignRegs(this.width); + this.r2 = this.r1.slice(1, this.width+1); + this.q2 = this.q1; + this.b2 = this.b1; + } else { + this.r1 = this.assignRegs(this.width+1); + this.workRegs = this.assignRegs(4); + this.b1 = cyclicCopy(this.workRegs, this.width); + this.q1 = cyclicCopy(this.workRegs, this.width); + + this.releaseRegs(this.workRegs); + this.releaseRegs(new RegVar(3,4,5,7)); + + this.r2 = this.r1.slice(1, this.width+1); + this.releaseRegs(removeSlice(this.r1, 1, this.width)); + + this.q2 = cyclicCopy(new RegVar(1,2), this.width); + this.b2 = this.assignRegs(this.width); + } + this.generate(); + } + + generate() { + this.pushRegs(); + + if (this.mulAB) { + if (this.isShort) { + this.genLoadVar(this.b1, 1); + } + } else { + this.genLoadVar(this.r1, 1); + + let lastR = this.r1[this.width]; + this.op("mov", lastR, "xzr"); + } + this.op_empty(); + + + this.op("adr", "x4", this.getVarName("np")); + this.op("ldr", "x4", this.getMem(4, 0)); + this.op_empty(); + + this.op("adr", "x6", this.getVarName("rawq")); + if (this.isShort) { + this.genLoadVar(this.q1, 6); + } + this.op_empty(); + + if (this.mulAB) { + this.op_comment("product0 = pRawB * pRawA"); + this.genMulAB(0); + } + + for (let i = 0; i < this.width; i++) { + this.op_comment("np0 = Fq_np * product", i, "[0]"); + this.op("mul", "x5", "x4", this.r1[i ? 1 : 0]); + + this.op_comment("product", i, " = product", i, " + Fq_rawq * np0"); + this.genMulRawq(i); + } + + this.op_comment("result ge ", this.getVarName("rawq")); + if (this.isShort) + this.genSub(this.b2, this.r2, this.q2); + else + this.genSubLoad(this.b2, this.r2, this.q2, 6); + this.op_empty(); + + if (!this.canOptimizeConsensys) { + this.op("cinc", "x8", "x8", "hs"); + this.op("cmp", "x8", "1"); + this.op_empty(); + } + + this.genCselStore(this.r2, this.b2, this.r2, 0); + + this.popRegs(); + this.op("ret"); + this.op_empty(); + } + + genMulAB(w) { + let i = 0; + for(; i < this.width; i++) { + let ra = new Reg(2); + let rs = new Reg(7); + + if (!this.isShort) this.genLoadVarWord(this.b1, i, 1); + + this.op("mul", (i ? rs : this.r1[i]), this.b1[i], ra); + if (i) + this.genAddReg(this.r1[i], this.r1[i], rs, i-1); + + this.op("umulh", this.r1[i+1], this.b1[i], ra); + + } + if (i > 1) + this.op("adc", this.r1[i], this.r1[i], "xzr"); + this.op_empty(); + } + + genMulRawq(w) { + let t1 = cyclicCopy(new RegVar(7, 3), this.width); + let np0 = new Reg(5); + let carry = new Reg(8); + + let i = 0; + for(; i < this.width; i++) { + if (!this.isShort) this.genLoadVarWord(this.q1, i, 6); + this.op("mul", t1[i], this.q1[i], np0); + if (w > 0) + this.genAddReg(this.r1[i], this.r1[i+1], t1[i], i); + else + this.genAddReg(this.r1[i], this.r1[i], t1[i], i); + } + if (w > 0) { + if (!this.canOptimizeConsensys) { + this.op("adcs", this.r1[i], "xzr", carry); + this.op("adc", carry, "xzr", "xzr"); + this.op_empty(); + } else { + this.op("adc", this.r1[i], "xzr", "xzr"); + this.op_empty(); + this.op("adds", this.r1[1], this.r1[1], carry); + } + } else { + this.op("adc", this.r1[i], this.r1[i], "xzr"); + this.op_empty(); + } + + for(i = 0; i < this.width; i++) { + if (!this.isShort) this.genLoadVarWord(this.q1, i, 6); + this.op("umulh", t1[i], this.q1[i], np0); + this.genAddReg(this.r1[i+1], this.r1[i+1], t1[i], i); + } + + if (!this.canOptimizeConsensys) { + if (w) { + this.op("adc", carry, carry, "xzr"); + } else { + this.op("adc", carry, "xzr", "xzr"); + } + } else { + if (w < this.width - 1) { + this.op("adc", carry, "xzr", "xzr"); + } + } + this.op_empty(); + } +} + +function generate(width, space, canOptimizeConsensys) { + let generators = [ + new Gen_rawAdd(width, space, "rawAdd", false), + new Gen_rawAdd(width, space, "rawAddLS", true), + new Gen_rawSub(width, space, "rawSub", ""), + new Gen_rawSub(width, space, "rawSubSL", "SL"), + new Gen_rawSub(width, space, "rawSubLS", "LS"), + new Gen_rawSubRegular(width, space, "rawSubRegular"), + new Gen_rawNeg(width, space, "rawNeg"), + new Gen_rawNegLS(width, space, "rawNegLS"), + new Gen_rawMul(width, space, "rawMMul", canOptimizeConsensys), + new Gen_rawMul1(width, space, "rawMMul1", true, canOptimizeConsensys), + new Gen_rawMul1(width, space, "rawFromMontgomery", false, canOptimizeConsensys), + new Gen_rawIsZero(width, space), + new Gen_rawIsEq(width, space), + new Gen_rawCmp(width, space), + new Gen_rawCopy(width, space), + new Gen_rawCopyS2L(width, space), + new Gen_rawSwap(width, space), + new Gen_rawBinOp(width, space, "rawAnd", "and"), + new Gen_rawBinOp(width, space, "rawOr", "orr"), + new Gen_rawBinOp(width, space, "rawXor", "eor"), + new Gen_rawBinOp(width, space, "rawNot", "mvn"), + new Gen_rawShr(width, space, "rawShr"), + new Gen_rawShl(width, space, "rawShl") + ]; + return generators.join("\n") + "\n"; +} + +function genFuncs(space, q) { + const n64 = Math.floor((q.bitLength() - 1) / 64)+1; + const canOptimizeConsensys = q.shiftRight((n64-1)*64).leq( bigInt.one.shiftLeft(64).minus(1).shiftRight(1).minus(1) ); + + return generate(n64, space, canOptimizeConsensys); +} + diff --git a/src/binops.asm.ejs b/src/binops.asm.ejs index a142b34..68d2b8d 100644 --- a/src/binops.asm.ejs +++ b/src/binops.asm.ejs @@ -295,6 +295,7 @@ bnot_l1n: ; Modified Registers: ; r8, r9, 10, r11, rax, rcx ;;;;;;;;;;;;;;;;;;;;;; +<%=name%>_rawShr: rawShr: cmp rdx, 0 je <%=name%>_rawCopy @@ -378,6 +379,7 @@ rawShr_endif3_<%=i%>: ; Modified Registers: ; r8, r9, 10, r11, rax, rcx ;;;;;;;;;;;;;;;;;;;;;; +<%=name%>_rawShl: rawShl: cmp rdx, 0 je <%=name%>_rawCopy diff --git a/src/buildzqfield.js b/src/buildzqfield.js index 7886cfa..6fbc2e0 100755 --- a/src/buildzqfield.js +++ b/src/buildzqfield.js @@ -8,12 +8,14 @@ const renderFile = util.promisify(require("ejs").renderFile); const runningAsScript = !module.parent; const montgomeryBuilder = require("./montgomerybuilder"); +const armBuilder = require("./armbuilder"); class ZqBuilder { constructor(q, name) { const self = this; this.q=bigInt(q); this.n64 = Math.floor((this.q.bitLength() - 1) / 64)+1; + this.canOptimizeConsensys = this.q.shiftRight((this.n64-1)*64).leq( bigInt.one.shiftLeft(64).minus(1).shiftRight(1).minus(1) ); this.name = name; this.bigInt = bigInt; this.lastTmp=0; @@ -24,6 +26,7 @@ class ZqBuilder { return label+"_"+self.lastTmp; }; this.montgomeryBuilder = montgomeryBuilder; + this.armBuilder = armBuilder; } constantElement(v) { @@ -46,8 +49,12 @@ async function buildField(q, name) { let asm = await renderFile(path.join(__dirname, "fr.asm.ejs"), builder); const cpp = await renderFile(path.join(__dirname, "fr.cpp.ejs"), builder); const hpp = await renderFile(path.join(__dirname, "fr.hpp.ejs"), builder); + const element_hpp = await renderFile(path.join(__dirname, "fr_element.hpp.ejs"), builder); + const generic_cpp = await renderFile(path.join(__dirname, "fr_generic.cpp.ejs"), builder); + const raw_generic_cpp = await renderFile(path.join(__dirname, "fr_raw_generic.cpp.ejs"), builder); + const raw_arm64_s = await renderFile(path.join(__dirname, "fr_raw_arm64.s.ejs"), builder); - return {asm: asm, hpp: hpp, cpp: cpp}; + return {asm: asm, hpp: hpp, cpp: cpp, element_hpp: element_hpp, generic_cpp: generic_cpp, raw_generic_cpp: raw_generic_cpp, raw_arm64_s: raw_arm64_s}; } if (runningAsScript) { @@ -64,12 +71,20 @@ if (runningAsScript) { const asmFileName = (argv.oa) ? argv.oa : argv.name.toLowerCase() + ".asm"; const hFileName = (argv.oh) ? argv.oh : argv.name.toLowerCase() + ".hpp"; const cFileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + ".cpp"; + const hElementFileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + "_element.hpp"; + const cGenericFileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + "_generic.cpp"; + const cRawGenericFileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + "_raw_generic.cpp"; + const sRawArm64FileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + "_raw_arm64.s"; buildField(q, argv.name).then( (res) => { fs.writeFileSync(asmFileName, res.asm, "utf8"); fs.writeFileSync(hFileName, res.hpp, "utf8"); fs.writeFileSync(cFileName, res.cpp, "utf8"); + fs.writeFileSync(hElementFileName, res.element_hpp, "utf8"); + fs.writeFileSync(cGenericFileName, res.generic_cpp, "utf8"); + fs.writeFileSync(cRawGenericFileName, res.raw_generic_cpp, "utf8"); + fs.writeFileSync(sRawArm64FileName, res.raw_arm64_s, "utf8"); }); } else { diff --git a/src/fr.asm.ejs b/src/fr.asm.ejs index 41311cd..03daca0 100644 --- a/src/fr.asm.ejs +++ b/src/fr.asm.ejs @@ -37,11 +37,14 @@ global <%=name%>_rawSub global <%=name%>_rawNeg global <%=name%>_rawMMul + global <%=name%>_rawMMul1 global <%=name%>_rawMSquare global <%=name%>_rawToMontgomery global <%=name%>_rawFromMontgomery global <%=name%>_rawIsEq global <%=name%>_rawIsZero + global <%=name%>_rawShr + global <%=name%>_rawShl global <%=name%>_rawq global <%=name%>_rawR3 diff --git a/src/fr.cpp.ejs b/src/fr.cpp.ejs index 50ac0e7..9d3f1e2 100644 --- a/src/fr.cpp.ejs +++ b/src/fr.cpp.ejs @@ -166,6 +166,9 @@ void <%=name%>_fail() { assert(false); } +void <%=name%>_longErr() { + <%=name%>_fail(); +} Raw<%=name%>::Raw<%=name%>() { <%=name%>_init(); @@ -212,7 +215,7 @@ void Raw<%=name%>::set(Element &r, int value) { } mpz_export((void *)(r.v), NULL, -1, 8, -1, 0, mr); - + for (int i=0; i<<%=name%>_N64; i++) r.v[i] = 0; mpz_export((void *)(r.v), NULL, -1, 8, -1, 0, mr); <%=name%>_rawToMontgomery(r.v,r.v); @@ -242,7 +245,7 @@ void Raw<%=name%>::inv(Element &r, const Element &a) { for (int i=0; i<<%=name%>_N64; i++) r.v[i] = 0; mpz_export((void *)(r.v), NULL, -1, 8, -1, 0, mr); - <%=name%>_rawMMul(r.v, r.v,<%=name%>_rawR3); + <%=name%>_rawMMul(r.v, r.v,<%=name%>_R3.longVal); mpz_clear(mr); } @@ -294,11 +297,11 @@ int Raw<%=name%>::toRprBE(const Element &element, uint8_t *data, int bytes) mpz_t r; mpz_init(r); - + toMpz(r, element); - + mpz_export(data, NULL, 1, bytes, 1, 0, r); - + return <%=name%>_N64 * 8; } diff --git a/src/fr.hpp.ejs b/src/fr.hpp.ejs index 7444e88..1bbe388 100644 --- a/src/fr.hpp.ejs +++ b/src/fr.hpp.ejs @@ -1,25 +1,18 @@ #ifndef __<%=name.toUpperCase()%>_H #define __<%=name.toUpperCase()%>_H -#include +#include "<%=name.toLowerCase()%>_element.hpp" +#include #include #include -#define <%=name%>_N64 <%= n64 %> -#define <%=name%>_SHORT 0x00000000 -#define <%=name%>_LONG 0x80000000 -#define <%=name%>_LONGMONTGOMERY 0xC0000000 -typedef uint64_t <%=name%>RawElement[<%=name%>_N64]; -typedef struct __attribute__((__packed__)) { - int32_t shortVal; - uint32_t type; - <%=name%>RawElement longVal; -} <%=name%>Element; -typedef <%=name%>Element *P<%=name%>Element; extern <%=name%>Element <%=name%>_q; +extern <%=name%>Element <%=name%>_R2; extern <%=name%>Element <%=name%>_R3; -extern <%=name%>RawElement <%=name%>_rawq; -extern <%=name%>RawElement <%=name%>_rawR3; + +#ifdef USE_ASM + +#if defined(ARCH_X86_64) extern "C" void <%=name%>_copy(P<%=name%>Element r, P<%=name%>Element a); extern "C" void <%=name%>_copyn(P<%=name%>Element r, P<%=name%>Element a, int n); @@ -58,13 +51,141 @@ extern "C" void <%=name%>_rawNeg(<%=name%>RawElement pRawResult, const <%=name%> extern "C" void <%=name%>_rawMMul(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); extern "C" void <%=name%>_rawMSquare(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); extern "C" void <%=name%>_rawMMul1(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, uint64_t pRawB); -extern "C" void <%=name%>_rawToMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement &pRawA); -extern "C" void <%=name%>_rawFromMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement &pRawA); -extern "C" int <%=name%>_rawIsEq(const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); +extern "C" void <%=name%>_rawToMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +extern "C" void <%=name%>_rawFromMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +extern "C" int <%=name%>_rawIsEq(<%=name%>RawElement const pRawA, const <%=name%>RawElement pRawB); extern "C" int <%=name%>_rawIsZero(const <%=name%>RawElement pRawB); +extern "C" void <%=name%>_rawShl(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b); +extern "C" void <%=name%>_rawShr(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b); extern "C" void <%=name%>_fail(); +#elif defined(ARCH_ARM64) + + void <%=name%>_copy(P<%=name%>Element r, P<%=name%>Element a); + void <%=name%>_mul(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_toNormal(P<%=name%>Element r, P<%=name%>Element a); + + void <%=name%>_toLongNormal(P<%=name%>Element r, P<%=name%>Element a); + int <%=name%>_isTrue(P<%=name%>Element pE); + void <%=name%>_copyn(P<%=name%>Element r, P<%=name%>Element a, int n); + void <%=name%>_lt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + int <%=name%>_toInt(P<%=name%>Element pE); + void <%=name%>_shr(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_shl(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_band(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_bor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_bxor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_bnot(P<%=name%>Element r, P<%=name%>Element a); + void <%=name%>_sub(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_eq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_neq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_add(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_gt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_leq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_geq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_lor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_lnot(P<%=name%>Element r, P<%=name%>Element a); + void <%=name%>_land(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + void <%=name%>_neg(P<%=name%>Element r, P<%=name%>Element a); + void <%=name%>_toMontgomery(P<%=name%>Element r, P<%=name%>Element a); + void <%=name%>_square(P<%=name%>Element r, P<%=name%>Element a); + +extern "C" void <%=name%>_rawCopy(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +extern "C" void <%=name%>_rawSwap(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA); +extern "C" void <%=name%>_rawAdd(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); +extern "C" void <%=name%>_rawSub(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); +extern "C" void <%=name%>_rawNeg(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +extern "C" void <%=name%>_rawMMul(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); + void <%=name%>_rawMSquare(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +extern "C" void <%=name%>_rawMMul1(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, uint64_t pRawB); + void <%=name%>_rawToMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +extern "C" void <%=name%>_rawFromMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +extern "C" int <%=name%>_rawIsEq(const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); +extern "C" int <%=name%>_rawIsZero(const <%=name%>RawElement pRawB); + void <%=name%>_rawZero(<%=name%>RawElement pRawResult); +extern "C" void <%=name%>_rawCopyS2L(<%=name%>RawElement pRawResult, int64_t val); +extern "C" void <%=name%>_rawAddLS(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, uint64_t rawB); +extern "C" void <%=name%>_rawSubSL(<%=name%>RawElement pRawResult, uint64_t rawA, <%=name%>RawElement pRawB); +extern "C" void <%=name%>_rawSubLS(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, uint64_t rawB); +extern "C" void <%=name%>_rawNegLS(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, uint64_t rawB); +extern "C" int <%=name%>_rawCmp(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB); +extern "C" void <%=name%>_rawAnd(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB); +extern "C" void <%=name%>_rawOr(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB); +extern "C" void <%=name%>_rawXor(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB); +extern "C" void <%=name%>_rawShl(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b); +extern "C" void <%=name%>_rawShr(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b); +extern "C" void <%=name%>_rawNot(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA); +extern "C" void <%=name%>_rawSubRegular(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB); + + void <%=name%>_fail(); + void <%=name%>_longErr(); + +#endif + +#else + + +void <%=name%>_copy(P<%=name%>Element r, P<%=name%>Element a); +void <%=name%>_mul(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_toNormal(P<%=name%>Element r, P<%=name%>Element a); + +void <%=name%>_toLongNormal(P<%=name%>Element r, P<%=name%>Element a); +int <%=name%>_isTrue(P<%=name%>Element pE); +void <%=name%>_copyn(P<%=name%>Element r, P<%=name%>Element a, int n); +void <%=name%>_lt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +int <%=name%>_toInt(P<%=name%>Element pE); +void <%=name%>_shl(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_shr(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_band(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_bor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_bxor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_bnot(P<%=name%>Element r, P<%=name%>Element a); +void <%=name%>_sub(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_eq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_neq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_add(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_gt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_leq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_geq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_lor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_lnot(P<%=name%>Element r, P<%=name%>Element a); +void <%=name%>_land(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_neg(P<%=name%>Element r, P<%=name%>Element a); +void <%=name%>_toMontgomery(P<%=name%>Element r, P<%=name%>Element a); +void <%=name%>_square(P<%=name%>Element r, P<%=name%>Element a); + +void <%=name%>_rawCopy(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +void <%=name%>_rawSwap(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA); +void <%=name%>_rawAdd(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); +void <%=name%>_rawSub(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); +void <%=name%>_rawNeg(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +void <%=name%>_rawMMul(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); +void <%=name%>_rawMSquare(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +void <%=name%>_rawMMul1(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, uint64_t pRawB); +void <%=name%>_rawToMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +void <%=name%>_rawFromMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA); +int <%=name%>_rawIsEq(const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB); +int <%=name%>_rawIsZero(const <%=name%>RawElement pRawB); +void <%=name%>_rawZero(<%=name%>RawElement pRawResult); +void <%=name%>_rawCopyS2L(<%=name%>RawElement pRawResult, int64_t val); +void <%=name%>_rawAddLS(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, uint64_t rawB); +void <%=name%>_rawSubSL(<%=name%>RawElement pRawResult, uint64_t rawA, <%=name%>RawElement pRawB); +void <%=name%>_rawSubLS(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, uint64_t rawB); +void <%=name%>_rawNegLS(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, uint64_t rawB); +int <%=name%>_rawCmp(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB); +void <%=name%>_rawAnd(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB); +void <%=name%>_rawOr(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB); +void <%=name%>_rawXor(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB); +void <%=name%>_rawShl(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b); +void <%=name%>_rawShr(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b); +void <%=name%>_rawNot(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA); +void <%=name%>_rawSubRegular(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB); + +void <%=name%>_fail(); +void <%=name%>_longErr(); + +#endif // Pending functions to convert @@ -97,64 +218,61 @@ public: Raw<%=name%>(); ~Raw<%=name%>(); - const Element &zero() { return fZero; }; - const Element &one() { return fOne; }; - const Element &negOne() { return fNegOne; }; + const Element &zero() { return fZero; } + const Element &one() { return fOne; } + const Element &negOne() { return fNegOne; } Element set(int value); void set(Element &r, int value); void fromString(Element &r, const std::string &n, uint32_t radix = 10); std::string toString(const Element &a, uint32_t radix = 10); - void inline copy(Element &r, const Element &a) { <%=name%>_rawCopy(r.v, a.v); }; - void inline swap(Element &a, Element &b) { <%=name%>_rawSwap(a.v, b.v); }; - void inline add(Element &r, const Element &a, const Element &b) { <%=name%>_rawAdd(r.v, a.v, b.v); }; - void inline sub(Element &r, const Element &a, const Element &b) { <%=name%>_rawSub(r.v, a.v, b.v); }; - void inline mul(Element &r, const Element &a, const Element &b) { <%=name%>_rawMMul(r.v, a.v, b.v); }; - - Element inline add(const Element &a, const Element &b) { Element r; <%=name%>_rawAdd(r.v, a.v, b.v); return r;}; - Element inline sub(const Element &a, const Element &b) { Element r; <%=name%>_rawSub(r.v, a.v, b.v); return r;}; - Element inline mul(const Element &a, const Element &b) { Element r; <%=name%>_rawMMul(r.v, a.v, b.v); return r;}; - - Element inline neg(const Element &a) { Element r; <%=name%>_rawNeg(r.v, a.v); return r; }; - Element inline square(const Element &a) { Element r; <%=name%>_rawMSquare(r.v, a.v); return r; }; - - Element inline add(int a, const Element &b) { return add(set(a), b);}; - Element inline sub(int a, const Element &b) { return sub(set(a), b);}; - Element inline mul(int a, const Element &b) { return mul(set(a), b);}; - - Element inline add(const Element &a, int b) { return add(a, set(b));}; - Element inline sub(const Element &a, int b) { return sub(a, set(b));}; - Element inline mul(const Element &a, int b) { return mul(a, set(b));}; - - void inline mul1(Element &r, const Element &a, uint64_t b) { <%=name%>_rawMMul1(r.v, a.v, b); }; - void inline neg(Element &r, const Element &a) { <%=name%>_rawNeg(r.v, a.v); }; - void inline square(Element &r, const Element &a) { <%=name%>_rawMSquare(r.v, a.v); }; + void inline copy(Element &r, const Element &a) { <%=name%>_rawCopy(r.v, a.v); } + void inline swap(Element &a, Element &b) { <%=name%>_rawSwap(a.v, b.v); } + void inline add(Element &r, const Element &a, const Element &b) { <%=name%>_rawAdd(r.v, a.v, b.v); } + void inline sub(Element &r, const Element &a, const Element &b) { <%=name%>_rawSub(r.v, a.v, b.v); } + void inline mul(Element &r, const Element &a, const Element &b) { <%=name%>_rawMMul(r.v, a.v, b.v); } + + Element inline add(const Element &a, const Element &b) { Element r; <%=name%>_rawAdd(r.v, a.v, b.v); return r;} + Element inline sub(const Element &a, const Element &b) { Element r; <%=name%>_rawSub(r.v, a.v, b.v); return r;} + Element inline mul(const Element &a, const Element &b) { Element r; <%=name%>_rawMMul(r.v, a.v, b.v); return r;} + + Element inline neg(const Element &a) { Element r; <%=name%>_rawNeg(r.v, a.v); return r; } + Element inline square(const Element &a) { Element r; <%=name%>_rawMSquare(r.v, a.v); return r; } + + Element inline add(int a, const Element &b) { return add(set(a), b);} + Element inline sub(int a, const Element &b) { return sub(set(a), b);} + Element inline mul(int a, const Element &b) { return mul(set(a), b);} + + Element inline add(const Element &a, int b) { return add(a, set(b));} + Element inline sub(const Element &a, int b) { return sub(a, set(b));} + Element inline mul(const Element &a, int b) { return mul(a, set(b));} + + void inline mul1(Element &r, const Element &a, uint64_t b) { <%=name%>_rawMMul1(r.v, a.v, b); } + void inline neg(Element &r, const Element &a) { <%=name%>_rawNeg(r.v, a.v); } + void inline square(Element &r, const Element &a) { <%=name%>_rawMSquare(r.v, a.v); } void inv(Element &r, const Element &a); void div(Element &r, const Element &a, const Element &b); void exp(Element &r, const Element &base, uint8_t* scalar, unsigned int scalarSize); - void inline toMontgomery(Element &r, const Element &a) { <%=name%>_rawToMontgomery(r.v, a.v); }; - void inline fromMontgomery(Element &r, const Element &a) { <%=name%>_rawFromMontgomery(r.v, a.v); }; - int inline eq(const Element &a, const Element &b) { return <%=name%>_rawIsEq(a.v, b.v); }; - int inline isZero(const Element &a) { return <%=name%>_rawIsZero(a.v); }; + void inline toMontgomery(Element &r, const Element &a) { <%=name%>_rawToMontgomery(r.v, a.v); } + void inline fromMontgomery(Element &r, const Element &a) { <%=name%>_rawFromMontgomery(r.v, a.v); } + int inline eq(const Element &a, const Element &b) { return <%=name%>_rawIsEq(a.v, b.v); } + int inline isZero(const Element &a) { return <%=name%>_rawIsZero(a.v); } void toMpz(mpz_t r, const Element &a); void fromMpz(Element &a, const mpz_t r); int toRprBE(const Element &element, uint8_t *data, int bytes); int fromRprBE(Element &element, const uint8_t *data, int bytes); - - int bytes ( void ) { return <%=name%>_N64 * 8; }; - + + int bytes ( void ) { return <%=name%>_N64 * 8; } + + void fromUI(Element &r, unsigned long int v); static Raw<%=name%> field; }; - #endif // __<%=name.toUpperCase()%>_H - - - diff --git a/src/fr_element.hpp.ejs b/src/fr_element.hpp.ejs new file mode 100644 index 0000000..3856705 --- /dev/null +++ b/src/fr_element.hpp.ejs @@ -0,0 +1,23 @@ +#ifndef <%=name.toUpperCase()%>_ELEMENT_HPP +#define <%=name.toUpperCase()%>_ELEMENT_HPP + +#include + +#define <%=name%>_N64 <%= n64 %> +#define <%=name%>_SHORT 0x00000000 +#define <%=name%>_MONTGOMERY 0x40000000 +#define <%=name%>_SHORTMONTGOMERY 0x40000000 +#define <%=name%>_LONG 0x80000000 +#define <%=name%>_LONGMONTGOMERY 0xC0000000 + +typedef uint64_t <%=name%>RawElement[<%=name%>_N64]; + +typedef struct __attribute__((__packed__)) { + int32_t shortVal; + uint32_t type; + <%=name%>RawElement longVal; +} <%=name%>Element; + +typedef <%=name%>Element *P<%=name%>Element; + +#endif // <%=name.toUpperCase()%>_ELEMENT_HPP diff --git a/src/fr_generic.cpp.ejs b/src/fr_generic.cpp.ejs new file mode 100644 index 0000000..2cd5a6d --- /dev/null +++ b/src/fr_generic.cpp.ejs @@ -0,0 +1,2390 @@ +#include "<%=name.toLowerCase()+".hpp"%>" +#include +#include +#include + +<%=name%>Element <%=name%>_q = {0, 0x80000000, {<%= constantElement(q) %>}}; +<%=name%>Element <%=name%>_R2 = {0, 0x80000000, {<%= constantElement(bigInt.one.shiftLeft(n64*64*2).mod(q)) %>}}; +<%=name%>Element <%=name%>_R3 = {0, 0x80000000, {<%= constantElement(bigInt.one.shiftLeft(n64*64*3).mod(q)) %>}}; + +static <%=name%>RawElement half = {<%= constantElement(q.shiftRight(1)) %>}; +static <%=name%>RawElement zero = {0}; + + +void <%=name%>_copy(P<%=name%>Element r, const P<%=name%>Element a) +{ + *r = *a; +} + +void <%=name%>_toNormal(P<%=name%>Element r, P<%=name%>Element a) +{ + if (a->type == <%=name%>_LONGMONTGOMERY) + { + r->type = <%=name%>_LONG; + <%=name%>_rawFromMontgomery(r->longVal, a->longVal); + } + else + { + <%=name%>_copy(r, a); + } +} + +static inline int has_mul32_overflow(int64_t val) +{ + int64_t sign = val >> 31; + + if (sign) + { + sign = ~sign; + } + + return sign ? 1 : 0; +} + +static inline int <%=name%>_rawSMul(int64_t *r, int32_t a, int32_t b) +{ + *r = (int64_t)a * b; + + return has_mul32_overflow(*r); +} + +static inline void mul_s1s2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int64_t result; + + int overflow = <%=name%>_rawSMul(&result, a->shortVal, b->shortVal); + + if (overflow) + { + <%=name%>_rawCopyS2L(r->longVal, result); + r->type = <%=name%>_LONG; + r->shortVal = 0; + } + else + { + // done the same way as in intel asm implementation + r->shortVal = (int32_t)result; + r->type = <%=name%>_SHORT; + // + + <%=name%>_rawCopyS2L(r->longVal, result); + r->type = <%=name%>_LONG; + r->shortVal = 0; + } +} + +static inline void mul_l1nl2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>_rawMMul(r->longVal, a->longVal, b->longVal); + <%=name%>_rawMMul(r->longVal, r->longVal, <%=name%>_R3.longVal); +} + +static inline void mul_l1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + <%=name%>_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_l1ml2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + <%=name%>_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_l1ml2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + <%=name%>_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_l1ns2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + if (b->shortVal < 0) + { + int64_t b_shortVal = b->shortVal; + <%=name%>_rawMMul1(r->longVal, a->longVal, -b_shortVal); + <%=name%>_rawNeg(r->longVal, r->longVal); + } + else + { + <%=name%>_rawMMul1(r->longVal, a->longVal, b->shortVal); + } + + <%=name%>_rawMMul(r->longVal, r->longVal, <%=name%>_R3.longVal); +} + +static inline void mul_s1nl2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + if (a->shortVal < 0) + { + int64_t a_shortVal = a->shortVal; + <%=name%>_rawMMul1(r->longVal, b->longVal, -a_shortVal); + <%=name%>_rawNeg(r->longVal, r->longVal); + } + else + { + <%=name%>_rawMMul1(r->longVal, b->longVal, a->shortVal); + } + + <%=name%>_rawMMul(r->longVal, r->longVal, <%=name%>_R3.longVal); +} + +static inline void mul_l1ms2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + if (b->shortVal < 0) + { + int64_t b_shortVal = b->shortVal; + <%=name%>_rawMMul1(r->longVal, a->longVal, -b_shortVal); + <%=name%>_rawNeg(r->longVal, r->longVal); + } + else + { + <%=name%>_rawMMul1(r->longVal, a->longVal, b->shortVal); + } +} + +static inline void mul_s1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + if (a->shortVal < 0) + { + int64_t a_shortVal = a->shortVal; + <%=name%>_rawMMul1(r->longVal, b->longVal, -a_shortVal); + <%=name%>_rawNeg(r->longVal, r->longVal); + } + else + { + <%=name%>_rawMMul1(r->longVal, b->longVal, a->shortVal); + } +} + +static inline void mul_l1ns2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + <%=name%>_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_l1ms2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + <%=name%>_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_s1ml2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + <%=name%>_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_s1ml2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + <%=name%>_rawMMul(r->longVal, a->longVal, b->longVal); +} + +void <%=name%>_mul(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (a->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + mul_l1ml2m(r, a, b); + } + else + { + mul_l1ml2n(r, a, b); + } + } + else + { + if (b->type & <%=name%>_MONTGOMERY) + { + mul_l1nl2m(r, a, b); + } + else + { + mul_l1nl2n(r, a, b); + } + } + } + else if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + mul_l1ms2m(r, a, b); + } + else + { + mul_l1ms2n(r, a, b); + } + } + else + { + if (b->type & <%=name%>_MONTGOMERY) + { + mul_l1ns2m(r, a, b); + } + else + { + mul_l1ns2n(r, a, b); + } + } + } + else if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + mul_s1ml2m(r, a, b); + } + else + { + mul_s1ml2n(r,a, b); + } + } + else if (b->type & <%=name%>_MONTGOMERY) + { + mul_s1nl2m(r, a, b); + } + else + { + mul_s1nl2n(r, a, b); + } + } + else + { + mul_s1s2(r, a, b); + } +} + +void <%=name%>_toLongNormal(P<%=name%>Element r, P<%=name%>Element a) +{ + if (a->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + <%=name%>_rawFromMontgomery(r->longVal, a->longVal); + r->type = <%=name%>_LONG; + } + else + { + <%=name%>_copy(r, a); + } + } + else + { + <%=name%>_rawCopyS2L(r->longVal, a->shortVal); + r->type = <%=name%>_LONG; + r->shortVal = 0; + } +} + +void <%=name%>_toMontgomery(P<%=name%>Element r, P<%=name%>Element a) +{ + if (a->type & <%=name%>_MONTGOMERY) + { + <%=name%>_copy(r, a); + } + else if (a->type & <%=name%>_LONG) + { + r->shortVal = a->shortVal; + + <%=name%>_rawMMul(r->longVal, a->longVal, <%=name%>_R2.longVal); + + r->type = <%=name%>_LONGMONTGOMERY; + } + else if (a->shortVal < 0) + { + int64_t a_shortVal = a->shortVal; + <%=name%>_rawMMul1(r->longVal, <%=name%>_R2.longVal, -a_shortVal); + <%=name%>_rawNeg(r->longVal, r->longVal); + + r->type = <%=name%>_SHORTMONTGOMERY; + } + else + { + <%=name%>_rawMMul1(r->longVal, <%=name%>_R2.longVal, a->shortVal); + + r->type = <%=name%>_SHORTMONTGOMERY; + } +} + +void <%=name%>_copyn(P<%=name%>Element r, P<%=name%>Element a, int n) +{ + std::memcpy(r, a, n * sizeof(<%=name%>Element)); +} + +static inline int has_add32_overflow(int64_t val) +{ + int64_t signs = (val >> 31) & 0x3; + + return signs == 1 || signs == 2; +} + +static inline int <%=name%>_rawSSub(int64_t *r, int32_t a, int32_t b) +{ + *r = (int64_t)a - b; + + return has_add32_overflow(*r); +} + +static inline void sub_s1s2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int64_t diff; + + int overflow = <%=name%>_rawSSub(&diff, a->shortVal, b->shortVal); + + if (overflow) + { + <%=name%>_rawCopyS2L(r->longVal, diff); + r->type = <%=name%>_LONG; + r->shortVal = 0; + } + else + { + r->type = <%=name%>_SHORT; + r->shortVal = (int32_t)diff; + } +} + +static inline void sub_l1nl2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>_rawSub(r->longVal, a->longVal, b->longVal); +} + +static inline void sub_l1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>Element a_m; + <%=name%>_toMontgomery(&a_m, a); + + <%=name%>_rawSub(r->longVal, a_m.longVal, b->longVal); +} + +static inline void sub_l1ml2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>_rawSub(r->longVal, a->longVal, b->longVal); +} + +static inline void sub_l1ml2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>Element b_m; + <%=name%>_toMontgomery(&b_m, b); + + <%=name%>_rawSub(r->longVal, a->longVal, b_m.longVal); +} + +static inline void sub_s1l2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + if (a->shortVal >= 0) + { + <%=name%>_rawSubSL(r->longVal, a->shortVal, b->longVal); + } + else + { + int64_t a_shortVal = a->shortVal; + <%=name%>_rawNegLS(r->longVal, b->longVal, -a_shortVal); + } +} + +static inline void sub_l1ms2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>Element b_m; + <%=name%>_toMontgomery(&b_m, b); + + <%=name%>_rawSub(r->longVal, a->longVal, b_m.longVal); +} + +static inline void sub_s1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>Element a_m; + <%=name%>_toMontgomery(&a_m, a); + + <%=name%>_rawSub(r->longVal, a_m.longVal, b->longVal); +} + +static inline void sub_l1ns2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + if (b->shortVal < 0) + { + int64_t b_shortVal = b->shortVal; + <%=name%>_rawAddLS(r->longVal, a->longVal, -b_shortVal); + } + else + { + <%=name%>_rawSubLS(r->longVal, a->longVal, b->shortVal); + } +} + +static inline void sub_l1ms2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>_rawSub(r->longVal, a->longVal, b->longVal); +} + +static inline void sub_s1ml2m(P<%=name%>Element r,P<%=name%>Element a,P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>_rawSub(r->longVal, a->longVal, b->longVal); +} + +void <%=name%>_sub(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (a->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + sub_l1ml2m(r, a, b); + } + else + { + sub_l1ml2n(r, a, b); + } + } + else if (b->type & <%=name%>_MONTGOMERY) + { + sub_l1nl2m(r, a, b); + } + else + { + sub_l1nl2n(r, a, b); + } + } + else if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + sub_l1ms2m(r, a, b); + } + else + { + sub_l1ms2n(r, a, b); + } + } + else + { + sub_l1ns2(r, a, b); + } + } + else if (b->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_MONTGOMERY) + { + if (a->type & <%=name%>_MONTGOMERY) + { + sub_s1ml2m(r,a,b); + } + else + { + sub_s1nl2m(r,a,b); + } + } + else + { + sub_s1l2n(r,a,b); + } + } + else + { + sub_s1s2(r, a, b); + } +} + +static inline int <%=name%>_rawSAdd(int64_t *r, int32_t a, int32_t b) +{ + *r = (int64_t)a + b; + + return has_add32_overflow(*r); +} + +static inline void add_s1s2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int64_t sum; + + int overflow = <%=name%>_rawSAdd(&sum, a->shortVal, b->shortVal); + + if (overflow) + { + <%=name%>_rawCopyS2L(r->longVal, sum); + r->type = <%=name%>_LONG; + r->shortVal = 0; + } + else + { + r->type = <%=name%>_SHORT; + r->shortVal = (int32_t)sum; + } +} + +static inline void add_l1nl2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>_rawAdd(r->longVal, a->longVal, b->longVal); +} + +static inline void add_l1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>Element a_m; + <%=name%>_toMontgomery(&a_m, a); + + <%=name%>_rawAdd(r->longVal, a_m.longVal, b->longVal); +} + +static inline void add_l1ml2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + <%=name%>_rawAdd(r->longVal, a->longVal, b->longVal); +} + +static inline void add_l1ml2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>Element b_m; + <%=name%>_toMontgomery(&b_m, b); + + <%=name%>_rawAdd(r->longVal, a->longVal, b_m.longVal); +} + +static inline void add_s1l2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + if (a->shortVal >= 0) + { + <%=name%>_rawAddLS(r->longVal, b->longVal, a->shortVal); + } + else + { + int64_t a_shortVal = a->shortVal; + <%=name%>_rawSubLS(r->longVal, b->longVal, -a_shortVal); + } +} + +static inline void add_l1ms2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element b_m; + + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>_toMontgomery(&b_m, b); + + <%=name%>_rawAdd(r->longVal, a->longVal, b_m.longVal); +} + +static inline void add_s1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>Element m_a; + <%=name%>_toMontgomery(&m_a, a); + + <%=name%>_rawAdd(r->longVal, m_a.longVal, b->longVal); +} + +static inline void add_l1ns2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + if (b->shortVal >= 0) + { + <%=name%>_rawAddLS(r->longVal, a->longVal, b->shortVal); + } + else + { + int64_t b_shortVal = b->shortVal; + <%=name%>_rawSubLS(r->longVal, a->longVal, -b_shortVal); + } +} + +static inline void add_l1ms2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>_rawAdd(r->longVal, a->longVal, b->longVal); +} + +static inline void add_s1ml2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONGMONTGOMERY; + + <%=name%>_rawAdd(r->longVal, a->longVal, b->longVal); +} + +void <%=name%>_add(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (a->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + add_l1ml2m(r, a, b); + } + else + { + add_l1ml2n(r, a, b); + } + } + else + { + if (b->type & <%=name%>_MONTGOMERY) + { + add_l1nl2m(r, a, b); + } + else + { + add_l1nl2n(r, a, b); + } + } + } + else if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + add_l1ms2m(r, a, b); + } + else + { + add_l1ms2n(r, a, b); + } + } + else + { + add_l1ns2(r, a, b); + } + } + else if (b->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_MONTGOMERY) + { + if (a->type & <%=name%>_MONTGOMERY) + { + add_s1ml2m(r, a, b); + } + else + { + add_s1nl2m(r, a, b); + } + } + else + { + add_s1l2n(r, a, b); + } + } + else + { + add_s1s2(r, a, b); + } +} + +int <%=name%>_isTrue(P<%=name%>Element pE) +{ + int result; + + if (pE->type & <%=name%>_LONG) + { + result = !<%=name%>_rawIsZero(pE->longVal); + } + else + { + result = pE->shortVal != 0; + } + + return result; +} + +int <%=name%>_longNeg(P<%=name%>Element pE) +{ + if(<%=name%>_rawCmp(pE->longVal, <%=name%>_q.longVal) >= 0) + { + <%=name%>_longErr(); + return 0; + } + + int64_t result = pE->longVal[0] - <%=name%>_q.longVal[0]; + + int64_t is_long = (result >> 31) + 1; + + if(is_long) + { + <%=name%>_longErr(); + return 0; + } + + return result; +} + +int <%=name%>_longNormal(P<%=name%>Element pE) +{ + uint64_t is_long = 0; + uint64_t result; + + result = pE->longVal[0]; + + is_long = result >> 31; + + if (is_long) + { + return <%=name%>_longNeg(pE); + } + + if (memcmp(&pE->longVal[1], zero, (sizeof(pE->longVal) - sizeof(pE->longVal[0])))) + { + return <%=name%>_longNeg(pE); + } + + return result; +} + +// Convert a 64 bit integer to a long format field element +int <%=name%>_toInt(P<%=name%>Element pE) +{ + int result; + + if (pE->type & <%=name%>_LONG) + { + if (pE->type & <%=name%>_MONTGOMERY) + { + <%=name%>Element e_n; + <%=name%>_toNormal(&e_n, pE); + + result = <%=name%>_longNormal(&e_n); + } + else + { + result = <%=name%>_longNormal(pE); + } + } + else + { + result = pE->shortVal; + } + + return result; +} + +static inline int rlt_s1s2(P<%=name%>Element a, P<%=name%>Element b) +{ + return (a->shortVal < b->shortVal) ? 1 : 0; +} + +static inline int rltRawL1L2(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + int result = <%=name%>_rawCmp(pRawB, pRawA); + + return result > 0 ? 1 : 0; +} + +static inline int rltl1l2_n1(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + int result = <%=name%>_rawCmp(half, pRawB); + + if (result < 0) + { + return rltRawL1L2(pRawA, pRawB); + } + + return 1; +} + +static inline int rltl1l2_p1(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + int result = <%=name%>_rawCmp(half, pRawB); + + if (result < 0) + { + return 0; + } + + return rltRawL1L2(pRawA, pRawB); +} + +static inline int rltL1L2(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + int result = <%=name%>_rawCmp(half, pRawA); + + if (result < 0) + { + return rltl1l2_n1(pRawA, pRawB); + } + + return rltl1l2_p1(pRawA, pRawB); +} + +static inline int rlt_l1nl2n(P<%=name%>Element a, P<%=name%>Element b) +{ + return rltL1L2(a->longVal, b->longVal); +} + +static inline int rlt_l1nl2m(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element b_n; + + <%=name%>_toNormal(&b_n, b); + + return rltL1L2(a->longVal, b_n.longVal); +} + +static inline int rlt_l1ml2m(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&a_n, a); + <%=name%>_toNormal(&b_n, b); + + return rltL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rlt_l1ml2n(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + + <%=name%>_toNormal(&a_n, a); + + return rltL1L2(a_n.longVal, b->longVal); +} + +static inline int rlt_s1l2n(P<%=name%>Element a,P<%=name%>Element b) +{ + <%=name%>Element a_n; + + <%=name%>_toLongNormal(&a_n,a); + + return rltL1L2(a_n.longVal, b->longVal); +} + +static inline int rlt_l1ms2(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + <%=name%>Element b_ln; + + <%=name%>_toLongNormal(&b_ln ,b); + <%=name%>_toNormal(&a_n, a); + + return rltL1L2(a_n.longVal, b_ln.longVal); +} + +static inline int rlt_s1l2m(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toLongNormal(&a_n, a); + <%=name%>_toNormal(&b_n, b); + + return rltL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rlt_l1ns2(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element b_n; + + <%=name%>_toLongNormal(&b_n, b); + + return rltL1L2(a->longVal, b_n.longVal); +} + +int32_t <%=name%>_rlt(P<%=name%>Element a, P<%=name%>Element b) +{ + int32_t result; + + if (a->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + result = rlt_l1ml2m(a, b); + } + else + { + result = rlt_l1ml2n(a, b); + } + } + else if (b->type & <%=name%>_MONTGOMERY) + { + result = rlt_l1nl2m(a, b); + } + else + { + result = rlt_l1nl2n(a, b); + } + } + else if (a->type & <%=name%>_MONTGOMERY) + { + result = rlt_l1ms2(a, b); + } + else + { + result = rlt_l1ns2(a, b); + } + } + else if (b->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_MONTGOMERY) + { + result = rlt_s1l2m(a,b); + } + else + { + result = rlt_s1l2n(a,b); + } + } + else + { + result = rlt_s1s2(a, b); + } + + return result; +} + +void <%=name%>_lt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->shortVal = <%=name%>_rlt(a, b); + r->type = <%=name%>_SHORT; +} + +void <%=name%>_geq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int32_t result = <%=name%>_rlt(a, b); + result ^= 0x1; + + r->shortVal = result; + r->type = <%=name%>_SHORT; +} + +static inline int <%=name%>_rawSNeg(int64_t *r, int32_t a) +{ + *r = -(int64_t)a; + + return has_add32_overflow(*r); +} + +void <%=name%>_neg(P<%=name%>Element r, P<%=name%>Element a) +{ + if (a->type & <%=name%>_LONG) + { + r->type = a->type; + r->shortVal = a->shortVal; + <%=name%>_rawNeg(r->longVal, a->longVal); + } + else + { + int64_t a_shortVal; + + int overflow = <%=name%>_rawSNeg(&a_shortVal, a->shortVal); + + if (overflow) + { + <%=name%>_rawCopyS2L(r->longVal, a_shortVal); + r->type = <%=name%>_LONG; + r->shortVal = 0; + } + else + { + r->type = <%=name%>_SHORT; + r->shortVal = (int32_t)a_shortVal; + } + } +} + +static inline int reqL1L2(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + return <%=name%>_rawCmp(pRawB, pRawA) == 0; +} + +static inline int req_s1s2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + return (a->shortVal == b->shortVal) ? 1 : 0; +} + +static inline int req_l1nl2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + return reqL1L2(a->longVal, b->longVal); +} + +static inline int req_l1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_m; + <%=name%>_toMontgomery(&a_m, a); + + return reqL1L2(a_m.longVal, b->longVal); +} + +static inline int req_l1ml2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + return reqL1L2(a->longVal, b->longVal); +} + +static inline int req_l1ml2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element b_m; + <%=name%>_toMontgomery(&b_m, b); + + return reqL1L2(a->longVal, b_m.longVal); +} + +static inline int req_s1l2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + <%=name%>_toLongNormal(&a_n, a); + + return reqL1L2(a_n.longVal, b->longVal); +} + +static inline int req_l1ms2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element b_m; + <%=name%>_toMontgomery(&b_m, b); + + return reqL1L2(a->longVal, b_m.longVal); +} + +static inline int req_s1l2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_m; + <%=name%>_toMontgomery(&a_m, a); + + return reqL1L2(a_m.longVal, b->longVal); +} + +static inline int req_l1ns2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element b_n; + <%=name%>_toLongNormal(&b_n, b); + + return reqL1L2(a->longVal, b_n.longVal); +} + +// Compares two elements of any kind +int <%=name%>_req(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int result; + + if (a->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + result = req_l1ml2m(r, a, b); + } + else + { + result = req_l1ml2n(r, a, b); + } + } + else if (b->type & <%=name%>_MONTGOMERY) + { + result = req_l1nl2m(r, a, b); + } + else + { + result = req_l1nl2n(r, a, b); + } + } + else if (a->type & <%=name%>_MONTGOMERY) + { + result = req_l1ms2(r, a, b); + } + else + { + result = req_l1ns2(r, a, b); + } + } + else if (b->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_MONTGOMERY) + { + result = req_s1l2m(r, a, b); + } + else + { + result = req_s1l2n(r, a, b); + } + } + else + { + result = req_s1s2(r, a, b); + } + + return result; +} + +void <%=name%>_eq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->shortVal = <%=name%>_req(r, a, b); + r->type = <%=name%>_SHORT; +} + +void <%=name%>_neq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int result = <%=name%>_req(r, a, b); + + r->shortVal = result ^ 0x1; + r->type = <%=name%>_SHORT; +} + +// Logical or between two elements +void <%=name%>_lor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int32_t is_true_a; + + if (a->type & <%=name%>_LONG) + { + is_true_a = !<%=name%>_rawIsZero(a->longVal); + } + else + { + is_true_a = a->shortVal ? 1 : 0; + } + + int32_t is_true_b; + + if (b->type & <%=name%>_LONG) + { + is_true_b = !<%=name%>_rawIsZero(b->longVal); + } + else + { + is_true_b = b->shortVal ? 1 : 0; + } + + r->shortVal = is_true_a | is_true_b; + r->type = <%=name%>_SHORT; +} + +void <%=name%>_lnot(P<%=name%>Element r, P<%=name%>Element a) +{ + if (a->type & <%=name%>_LONG) + { + r->shortVal = <%=name%>_rawIsZero(a->longVal); + } + else + { + r->shortVal = a->shortVal ? 0 : 1; + } + + r->type = <%=name%>_SHORT; +} + + +static inline int rgt_s1s2(P<%=name%>Element a, P<%=name%>Element b) +{ + return (a->shortVal > b->shortVal) ? 1 : 0; +} + +static inline int rgtRawL1L2(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + int result = <%=name%>_rawCmp(pRawB, pRawA); + + return (result < 0) ? 1 : 0; +} + +static inline int rgtl1l2_n1(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + int result = <%=name%>_rawCmp(half, pRawB); + + if (result < 0) + { + return rgtRawL1L2(pRawA, pRawB); + } + return 0; +} + +static inline int rgtl1l2_p1(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + int result = <%=name%>_rawCmp(half, pRawB); + + if (result < 0) + { + return 1; + } + return rgtRawL1L2(pRawA, pRawB); +} + +static inline int rgtL1L2(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + int result = <%=name%>_rawCmp(half, pRawA); + + if (result < 0) + { + return rgtl1l2_n1(pRawA, pRawB); + } + + return rgtl1l2_p1(pRawA, pRawB); +} + +static inline int rgt_l1nl2n(P<%=name%>Element a, P<%=name%>Element b) +{ + return rgtL1L2(a->longVal, b->longVal); +} + +static inline int rgt_l1nl2m(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element b_n; + <%=name%>_toNormal(&b_n, b); + + return rgtL1L2(a->longVal, b_n.longVal); +} + +static inline int rgt_l1ml2m(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&a_n, a); + <%=name%>_toNormal(&b_n, b); + + return rgtL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rgt_l1ml2n(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + <%=name%>_toNormal(&a_n, a); + + return rgtL1L2(a_n.longVal, b->longVal); +} + +static inline int rgt_s1l2n(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + <%=name%>_toLongNormal(&a_n, a); + + return rgtL1L2(a_n.longVal, b->longVal); +} + +static inline int rgt_l1ms2(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&a_n, a); + <%=name%>_toLongNormal(&b_n, b); + + return rgtL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rgt_s1l2m(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toLongNormal(&a_n, a); + <%=name%>_toNormal(&b_n, b); + + return rgtL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rgt_l1ns2(P<%=name%>Element a, P<%=name%>Element b) +{ + <%=name%>Element b_n; + <%=name%>_toLongNormal(&b_n, b); + + return rgtL1L2(a->longVal, b_n.longVal); +} + +int <%=name%>_rgt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int result = 0; + + if (a->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + result = rgt_l1ml2m(a, b); + } + else + { + result = rgt_l1ml2n(a, b); + } + } + else if (b->type & <%=name%>_MONTGOMERY) + { + result = rgt_l1nl2m(a, b); + } + else + { + result = rgt_l1nl2n(a, b); + } + } + else if (a->type & <%=name%>_MONTGOMERY) + { + result = rgt_l1ms2(a, b); + } + else + { + result = rgt_l1ns2(a, b); + } + } + else if (b->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_MONTGOMERY) + { + result = rgt_s1l2m(a, b); + } + else + { + result = rgt_s1l2n(a,b); + } + } + else + { + result = rgt_s1s2(a, b); + } + + return result; +} + +void <%=name%>_gt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->shortVal = <%=name%>_rgt(r, a, b); + r->type = <%=name%>_SHORT; +} + +void <%=name%>_leq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int32_t result = <%=name%>_rgt(r, a, b); + result ^= 0x1; + + r->shortVal = result; + r->type = <%=name%>_SHORT; +} + +// Logical and between two elements +void <%=name%>_land(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + int32_t is_true_a; + + if (a->type & <%=name%>_LONG) + { + is_true_a = !<%=name%>_rawIsZero(a->longVal); + } + else + { + is_true_a = a->shortVal ? 1 : 0; + } + + int32_t is_true_b; + + if (b->type & <%=name%>_LONG) + { + is_true_b = !<%=name%>_rawIsZero(b->longVal); + } + else + { + is_true_b = b->shortVal ? 1 : 0; + } + + r->shortVal = is_true_a & is_true_b; + r->type = <%=name%>_SHORT; +} + +static inline void and_s1s2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (a->shortVal >= 0 && b->shortVal >= 0) + { + int32_t result = a->shortVal & b->shortVal; + r->shortVal = result; + r->type = <%=name%>_SHORT; + return; + } + + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toLongNormal(&a_n, a); + <%=name%>_toLongNormal(&b_n, b); + + <%=name%>_rawAnd(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void and_l1nl2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + <%=name%>_rawAnd(r->longVal, a->longVal, b->longVal); +} + +static inline void and_l1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element b_n; + <%=name%>_toNormal(&b_n, b); + + <%=name%>_rawAnd(r->longVal, a->longVal, b_n.longVal); +} + +static inline void and_l1ml2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&a_n, a); + <%=name%>_toNormal(&b_n, b); + + <%=name%>_rawAnd(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void and_l1ml2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>_toNormal(&a_n, a); + + <%=name%>_rawAnd(r->longVal, a_n.longVal, b->longVal); +} + +static inline void and_s1l2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&a_n, a); + } + + <%=name%>_rawAnd(r->longVal, a_n.longVal, b->longVal); +} + +static inline void and_l1ms2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&a_n, a); + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&b_n, b); + } + + <%=name%>_rawAnd(r->longVal, b_n.longVal, a_n.longVal); +} + +static inline void and_s1l2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&b_n, b); + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&a_n, a); + } + + <%=name%>_rawAnd(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void and_l1ns2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element b_n; + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&b_n, b); + } + + <%=name%>_rawAnd(r->longVal, a->longVal, b_n.longVal); +} + +// Ands two elements of any kind +void <%=name%>_band(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (a->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + and_l1ml2m(r, a, b); + } + else + { + and_l1ml2n(r, a, b); + } + } + else if (b->type & <%=name%>_MONTGOMERY) + { + and_l1nl2m(r, a, b); + } + else + { + and_l1nl2n(r, a, b); + } + } + else if (a->type & <%=name%>_MONTGOMERY) + { + and_l1ms2(r, a, b); + } + else + { + and_l1ns2(r, a, b); + } + } + else if (b->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_MONTGOMERY) + { + and_s1l2m(r, a, b); + } + else + { + and_s1l2n(r, a, b); + } + } + else + { + and_s1s2(r, a, b); + } +} + +void <%=name%>_rawZero(<%=name%>RawElement pRawResult) +{ + std::memset(pRawResult, 0, sizeof(<%=name%>RawElement)); +} + +static inline void rawShl(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b) +{ + if (b == 0) + { + <%=name%>_rawCopy(r, a); + return; + } + + if (b >= <%= q.bitLength() %>) + { + <%=name%>_rawZero(r); + return; + } + + <%=name%>_rawShl(r, a, b); +} + +static inline void rawShr(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b) +{ + if (b == 0) + { + <%=name%>_rawCopy(r, a); + return; + } + + if (b >= <%= q.bitLength() %>) + { + <%=name%>_rawZero(r); + return; + } + + <%=name%>_rawShr(r,a, b); +} + +static inline void <%=name%>_setzero(P<%=name%>Element r) +{ + r->type = 0; + r->shortVal = 0; +} + +static inline void do_shlcl(P<%=name%>Element r, P<%=name%>Element a, uint64_t b) +{ + <%=name%>Element a_long; + <%=name%>_toLongNormal(&a_long, a); + + r->type = <%=name%>_LONG; + rawShl(r->longVal, a_long.longVal, b); +} + +static inline void do_shlln(P<%=name%>Element r, P<%=name%>Element a, uint64_t b) +{ + r->type = <%=name%>_LONG; + rawShl(r->longVal, a->longVal, b); +} + +static inline void do_shl(P<%=name%>Element r, P<%=name%>Element a, uint64_t b) +{ + if (a->type & <%=name%>_LONG) + { + if (a->type == <%=name%>_LONGMONTGOMERY) + { + <%=name%>Element a_long; + <%=name%>_toNormal(&a_long, a); + + do_shlln(r, &a_long, b); + } + else + { + do_shlln(r, a, b); + } + } + else + { + int64_t a_shortVal = a->shortVal; + + if (a_shortVal == 0) + { + <%=name%>_setzero(r); + } + else if (a_shortVal < 0) + { + do_shlcl(r, a, b); + } + else if(b >= 31) + { + do_shlcl(r, a, b); + } + else + { + a_shortVal <<= b; + + const uint64_t a_is_over_short = a_shortVal >> 31; + + if (a_is_over_short) + { + do_shlcl(r, a, b); + } + else + { + r->type = <%=name%>_SHORT; + r->shortVal = a_shortVal; + } + } + } +} + +static inline void do_shrln(P<%=name%>Element r, P<%=name%>Element a, uint64_t b) +{ + r->type = <%=name%>_LONG; + rawShr(r->longVal, a->longVal, b); +} + +static inline void do_shrl(P<%=name%>Element r, P<%=name%>Element a, uint64_t b) +{ + if (a->type == <%=name%>_LONGMONTGOMERY) + { + <%=name%>Element a_long; + <%=name%>_toNormal(&a_long, a); + + do_shrln(r, &a_long, b); + } + else + { + do_shrln(r, a, b); + } +} + +static inline void do_shr(P<%=name%>Element r, P<%=name%>Element a, uint64_t b) +{ + if (a->type & <%=name%>_LONG) + { + do_shrl(r, a, b); + } + else + { + int64_t a_shortVal = a->shortVal; + + if (a_shortVal == 0) + { + <%=name%>_setzero(r); + } + else if (a_shortVal < 0) + { + <%=name%>Element a_long; + <%=name%>_toLongNormal(&a_long, a); + + do_shrl(r, &a_long, b); + } + else if(b >= 31) + { + <%=name%>_setzero(r); + } + else + { + a_shortVal >>= b; + + r->shortVal = a_shortVal; + r->type = <%=name%>_SHORT; + } + } +} + +static inline void <%=name%>_shr_big_shift(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + static <%=name%>RawElement max_shift = {<%= q.bitLength() %>}; + + <%=name%>RawElement shift; + + <%=name%>_rawSubRegular(shift, <%=name%>_q.longVal, b->longVal); + + if (<%=name%>_rawCmp(shift, max_shift) >= 0) + { + <%=name%>_setzero(r); + } + else + { + do_shl(r, a, shift[0]); + } +} + +static inline void <%=name%>_shr_long(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + static <%=name%>RawElement max_shift = {<%= q.bitLength() %>}; + + if (<%=name%>_rawCmp(b->longVal, max_shift) >= 0) + { + <%=name%>_shr_big_shift(r, a, b); + } + else + { + do_shr(r, a, b->longVal[0]); + } +} + +void <%=name%>_shr(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (b->type & <%=name%>_LONG) + { + if (b->type == <%=name%>_LONGMONTGOMERY) + { + <%=name%>Element b_long; + <%=name%>_toNormal(&b_long, b); + + <%=name%>_shr_long(r, a, &b_long); + } + else + { + <%=name%>_shr_long(r, a, b); + } + } + else + { + int64_t b_shortVal = b->shortVal; + + if (b_shortVal < 0) + { + b_shortVal = -b_shortVal; + + if (b_shortVal >= <%= q.bitLength() %>) + { + <%=name%>_setzero(r); + } + else + { + do_shl(r, a, b_shortVal); + } + } + else if (b_shortVal >= <%= q.bitLength() %>) + { + <%=name%>_setzero(r); + } + else + { + do_shr(r, a, b_shortVal); + } + } +} + +static inline void <%=name%>_shl_big_shift(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + static <%=name%>RawElement max_shift = {<%= q.bitLength() %>}; + + <%=name%>RawElement shift; + + <%=name%>_rawSubRegular(shift, <%=name%>_q.longVal, b->longVal); + + if (<%=name%>_rawCmp(shift, max_shift) >= 0) + { + <%=name%>_setzero(r); + } + else + { + do_shr(r, a, shift[0]); + } +} + +static inline void <%=name%>_shl_long(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + static <%=name%>RawElement max_shift = {<%= q.bitLength() %>}; + + if (<%=name%>_rawCmp(b->longVal, max_shift) >= 0) + { + <%=name%>_shl_big_shift(r, a, b); + } + else + { + do_shl(r, a, b->longVal[0]); + } +} + +void <%=name%>_shl(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (b->type & <%=name%>_LONG) + { + if (b->type == <%=name%>_LONGMONTGOMERY) + { + <%=name%>Element b_long; + <%=name%>_toNormal(&b_long, b); + + <%=name%>_shl_long(r, a, &b_long); + } + else + { + <%=name%>_shl_long(r, a, b); + } + } + else + { + int64_t b_shortVal = b->shortVal; + + if (b_shortVal < 0) + { + b_shortVal = -b_shortVal; + + if (b_shortVal >= <%= q.bitLength() %>) + { + <%=name%>_setzero(r); + } + else + { + do_shr(r, a, b_shortVal); + } + } + else if (b_shortVal >= <%= q.bitLength() %>) + { + <%=name%>_setzero(r); + } + else + { + do_shl(r, a, b_shortVal); + } + } +} + +void <%=name%>_square(P<%=name%>Element r, P<%=name%>Element a) +{ + if (a->type & <%=name%>_LONG) + { + if (a->type == <%=name%>_LONGMONTGOMERY) + { + r->type = <%=name%>_LONGMONTGOMERY; + <%=name%>_rawMSquare(r->longVal, a->longVal); + } + else + { + r->type = <%=name%>_LONGMONTGOMERY; + <%=name%>_rawMSquare(r->longVal, a->longVal); + <%=name%>_rawMMul(r->longVal, r->longVal, <%=name%>_R3.longVal); + } + } + else + { + int64_t result; + + int overflow = <%=name%>_rawSMul(&result, a->shortVal, a->shortVal); + + if (overflow) + { + <%=name%>_rawCopyS2L(r->longVal, result); + r->type = <%=name%>_LONG; + r->shortVal = 0; + } + else + { + // done the same way as in intel asm implementation + r->shortVal = (int32_t)result; + r->type = <%=name%>_SHORT; + // + + <%=name%>_rawCopyS2L(r->longVal, result); + r->type = <%=name%>_LONG; + r->shortVal = 0; + } + } +} + +static inline void or_s1s2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (a->shortVal >= 0 && b->shortVal >= 0) + { + r->shortVal = a->shortVal | b->shortVal; + r->type = <%=name%>_SHORT; + return; + } + + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toLongNormal(&a_n, a); + <%=name%>_toLongNormal(&b_n, b); + + <%=name%>_rawOr(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void or_s1l2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&b_n, b); + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&a_n, a); + } + + <%=name%>_rawOr(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void or_s1l2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&a_n, a); + } + + <%=name%>_rawOr(r->longVal, a_n.longVal, b->longVal); +} + +static inline void or_l1ns2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element b_n; + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&b_n, b); + } + + <%=name%>_rawOr(r->longVal, a->longVal, b_n.longVal); +} + +static inline void or_l1ms2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&a_n, a); + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&b_n, b); + } + + <%=name%>_rawOr(r->longVal, b_n.longVal, a_n.longVal); +} + +static inline void or_l1nl2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + <%=name%>_rawOr(r->longVal, a->longVal, b->longVal); +} + +static inline void or_l1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element b_n; + <%=name%>_toNormal(&b_n, b); + + <%=name%>_rawOr(r->longVal, a->longVal, b_n.longVal); +} + +static inline void or_l1ml2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>_toNormal(&a_n, a); + + <%=name%>_rawOr(r->longVal, a_n.longVal, b->longVal); +} + +static inline void or_l1ml2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&a_n, a); + <%=name%>_toNormal(&b_n, b); + + <%=name%>_rawOr(r->longVal, a_n.longVal, b_n.longVal); +} + + +void <%=name%>_bor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (a->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + or_l1ml2m(r, a, b); + } + else + { + or_l1ml2n(r, a, b); + } + } + else if (b->type & <%=name%>_MONTGOMERY) + { + or_l1nl2m(r, a, b); + } + else + { + or_l1nl2n(r, a, b); + } + } + else if (a->type & <%=name%>_MONTGOMERY) + { + or_l1ms2(r, a, b); + } + else + { + or_l1ns2(r, a, b); + } + } + else if (b->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_MONTGOMERY) + { + or_s1l2m(r, a, b); + } + else + { + or_s1l2n(r, a, b); + } + } + else + { + or_s1s2(r, a, b); + } +} + +static inline void xor_s1s2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (a->shortVal >= 0 && b->shortVal >= 0) + { + r->shortVal = a->shortVal ^ b->shortVal; + r->type = <%=name%>_SHORT; + return; + } + + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toLongNormal(&a_n, a); + <%=name%>_toLongNormal(&b_n, b); + + <%=name%>_rawXor(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void xor_s1l2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&a_n, a); + } + + <%=name%>_rawXor(r->longVal, a_n.longVal, b->longVal); +} + +static inline void xor_s1l2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&b_n, b); + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&a_n, a); + } + + <%=name%>_rawXor(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void xor_l1ns2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element b_n; + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&b_n, b); + } + + <%=name%>_rawXor(r->longVal, a->longVal, b_n.longVal); +} + +static inline void xor_l1ms2(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&a_n, a); + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal}}; + } + else + { + <%=name%>_toLongNormal(&b_n, b); + } + + <%=name%>_rawXor(r->longVal, b_n.longVal, a_n.longVal); +} + +static inline void xor_l1nl2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + <%=name%>_rawXor(r->longVal, a->longVal, b->longVal); +} + +static inline void xor_l1nl2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element b_n; + <%=name%>_toNormal(&b_n, b); + + <%=name%>_rawXor(r->longVal, a->longVal, b_n.longVal); +} + +static inline void xor_l1ml2n(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>_toNormal(&a_n, a); + + <%=name%>_rawXor(r->longVal, a_n.longVal, b->longVal); +} + +static inline void xor_l1ml2m(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + r->type = <%=name%>_LONG; + + <%=name%>Element a_n; + <%=name%>Element b_n; + + <%=name%>_toNormal(&a_n, a); + <%=name%>_toNormal(&b_n, b); + + <%=name%>_rawXor(r->longVal, a_n.longVal, b_n.longVal); +} + +void <%=name%>_bxor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) +{ + if (a->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + if (b->type & <%=name%>_MONTGOMERY) + { + xor_l1ml2m(r, a, b); + } + else + { + xor_l1ml2n(r, a, b); + } + } + else if (b->type & <%=name%>_MONTGOMERY) + { + xor_l1nl2m(r, a, b); + } + else + { + xor_l1nl2n(r, a, b); + } + } + else if (a->type & <%=name%>_MONTGOMERY) + { + xor_l1ms2(r, a, b); + } + else + { + xor_l1ns2(r, a, b); + } + } + else if (b->type & <%=name%>_LONG) + { + if (b->type & <%=name%>_MONTGOMERY) + { + xor_s1l2m(r, a, b); + } + else + { + xor_s1l2n(r, a, b); + } + } + else + { + xor_s1s2(r, a, b); + } +} + +void <%=name%>_bnot(P<%=name%>Element r, P<%=name%>Element a) +{ + r->type = <%=name%>_LONG; + + if (a->type == <%=name%>_LONG) + { + if (a->type & <%=name%>_MONTGOMERY) + { + <%=name%>Element a_n; + <%=name%>_toNormal(&a_n, a); + + <%=name%>_rawNot(r->longVal, a_n.longVal); + } + else + { + <%=name%>_rawNot(r->longVal, a->longVal); + } + } + else + { + <%=name%>Element a_n; + <%=name%>_toLongNormal(&a_n, a); + + <%=name%>_rawNot(r->longVal, a_n.longVal); + } +} diff --git a/src/fr_raw_arm64.s.ejs b/src/fr_raw_arm64.s.ejs new file mode 100644 index 0000000..3e119f0 --- /dev/null +++ b/src/fr_raw_arm64.s.ejs @@ -0,0 +1,57 @@ + .global <%=name%>_rawAdd + .global <%=name%>_rawAddLS + .global <%=name%>_rawSub + .global <%=name%>_rawSubRegular + .global <%=name%>_rawNeg + .global <%=name%>_rawNegLS + .global <%=name%>_rawSubSL + .global <%=name%>_rawSubLS + .global <%=name%>_rawMMul + .global <%=name%>_rawMMul1 + .global <%=name%>_rawFromMontgomery + .global <%=name%>_rawCopy + .global <%=name%>_rawSwap + .global <%=name%>_rawIsEq + .global <%=name%>_rawIsZero + .global <%=name%>_rawCopyS2L + .global <%=name%>_rawCmp + .global <%=name%>_rawAnd + .global <%=name%>_rawOr + .global <%=name%>_rawXor + .global <%=name%>_rawShr + .global <%=name%>_rawShl + .global <%=name%>_rawNot + + .global _<%=name%>_rawAdd + .global _<%=name%>_rawAddLS + .global _<%=name%>_rawSub + .global _<%=name%>_rawSubRegular + .global _<%=name%>_rawNeg + .global _<%=name%>_rawNegLS + .global _<%=name%>_rawSubSL + .global _<%=name%>_rawSubLS + .global _<%=name%>_rawMMul + .global _<%=name%>_rawMMul1 + .global _<%=name%>_rawFromMontgomery + .global _<%=name%>_rawCopy + .global _<%=name%>_rawSwap + .global _<%=name%>_rawIsEq + .global _<%=name%>_rawIsZero + .global _<%=name%>_rawCopyS2L + .global _<%=name%>_rawCmp + .global _<%=name%>_rawAnd + .global _<%=name%>_rawOr + .global _<%=name%>_rawXor + .global _<%=name%>_rawShr + .global _<%=name%>_rawShl + .global _<%=name%>_rawNot + + .text + .align 4 + +<%= armBuilder.genFuncs(name, q) %> + + .align 8 +<%=name%>_rawq: .quad <%= constantElement(q) %> +<%=name%>_np: .quad 0x<%= (bigInt.one.shiftLeft(64)).minus(q.modInv(bigInt.one.shiftLeft(64))).toString(16) %> +<%=name%>_lboMask: .quad 0x<%= bigInt("10000000000000000",16).shiftRight(n64*64 - q.bitLength()).minus(bigInt.one).toString(16) %> diff --git a/src/fr_raw_generic.cpp.ejs b/src/fr_raw_generic.cpp.ejs new file mode 100644 index 0000000..e13bbb1 --- /dev/null +++ b/src/fr_raw_generic.cpp.ejs @@ -0,0 +1,362 @@ +#include "<%=name.toLowerCase()+"_element.hpp"%>" +#include +#include + +static uint64_t <%=name%>_rawq[] = {<%= constantElement(q) %>, 0}; +static <%=name%>RawElement <%=name%>_rawR2 = {<%= constantElement(bigInt.one.shiftLeft(n64*64*2).mod(q)) %>}; +static uint64_t <%=name%>_np = 0x<%= (bigInt.one.shiftLeft(64)).minus(q.modInv(bigInt.one.shiftLeft(64))).toString(16) %>; +static uint64_t lboMask = 0x<%= bigInt("10000000000000000",16).shiftRight(n64*64 - q.bitLength()).minus(bigInt.one).toString(16) %>; +static <%=name%>RawElement zero = {0}; + + +void <%=name%>_rawAdd(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB) +{ + uint64_t carry = mpn_add_n(pRawResult, pRawA, pRawB, <%=name%>_N64); + + if(carry || mpn_cmp(pRawResult, <%=name%>_rawq, <%=name%>_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawAddLS(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, uint64_t rawB) +{ + uint64_t carry = mpn_add_1(pRawResult, pRawA, <%=name%>_N64, rawB); + + if(carry || mpn_cmp(pRawResult, <%=name%>_rawq, <%=name%>_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawSub(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB) +{ + uint64_t carry = mpn_sub_n(pRawResult, pRawA, pRawB, <%=name%>_N64); + + if(carry) + { + mpn_add_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawSubRegular(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + mpn_sub_n(pRawResult, pRawA, pRawB, <%=name%>_N64); +} + +void <%=name%>_rawSubSL(<%=name%>RawElement pRawResult, uint64_t rawA, <%=name%>RawElement pRawB) +{ + <%=name%>RawElement pRawA = {rawA}; + + uint64_t carry = mpn_sub_n(pRawResult, pRawA, pRawB, <%=name%>_N64); + + if(carry) + { + mpn_add_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawSubLS(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, uint64_t rawB) +{ + uint64_t carry = mpn_sub_1(pRawResult, pRawA, <%=name%>_N64, rawB); + + if(carry) + { + mpn_add_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawNeg(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA) +{ + if (mpn_cmp(pRawA, zero, <%=name%>_N64) != 0) + { + mpn_sub_n(pRawResult, <%=name%>_rawq, pRawA, <%=name%>_N64); + } + else + { + mpn_copyi(pRawResult, zero, <%=name%>_N64); + } +} + +// Substracts a long element and a short element form 0 +void <%=name%>_rawNegLS(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, uint64_t rawB) +{ + uint64_t carry1 = mpn_sub_1(pRawResult, <%=name%>_rawq, <%=name%>_N64, rawB); + uint64_t carry2 = mpn_sub_n(pRawResult, pRawResult, pRawA, <%=name%>_N64); + + if (carry1 || carry2) + { + mpn_add_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawCopy(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA) +{ + memcpy(pRawResult, pRawA, sizeof(<%=name%>RawElement)); +} + +int <%=name%>_rawIsEq(const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB) +{ + return mpn_cmp(pRawA, pRawB, <%=name%>_N64) == 0; +} + +void <%=name%>_rawMMul(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, const <%=name%>RawElement pRawB) +{ + const mp_size_t N = <%=name%>_N64+1; + const uint64_t *mq = <%=name%>_rawq; +<% if (!canOptimizeConsensys) { %> + uint64_t c = 0; +<% } -%> + uint64_t np0; +<% for (let i=0; i + uint64_t product<%=i%>[N] = {0}; +<% } %> + product0[N-1] = mpn_mul_1(product0, pRawB, <%=name%>_N64, pRawA[0]); +<% if (!canOptimizeConsensys) { -%> +<% for (let i=0; i + np0 = <%=name%>_np * product<%=i%>[0]; + product<%=i+1%>[N-1] += mpn_addmul_1(product<%=i%>, mq, N, np0); + + product<%=i+1%>[N-1] += mpn_addmul_1(product<%=i+1%>, pRawB, <%=name%>_N64, pRawA[<%=i+1%>]); +<% if (i == n64-2) { -%> + c = mpn_add(product<%=i+1%>, product<%=i+1%>, N, product<%=i%>+1, N-1); +<% } else { -%> + product<%=i+2%>[N-1] = mpn_add(product<%=i+1%>, product<%=i+1%>, N, product<%=i%>+1, N-1); +<% } -%> +<% } %> +<% } else { -%> +<% for (let i=0; i + np0 = <%=name%>_np * product<%=i%>[0]; + product<%=i+1%>[1] = mpn_addmul_1(product<%=i%>, mq, N, np0); + + product<%=i+1%>[N-1] = mpn_addmul_1(product<%=i+1%>, pRawB, <%=name%>_N64, pRawA[<%=i+1%>]); + mpn_add(product<%=i+1%>, product<%=i+1%>, N, product<%=i%>+1, N-1); +<% } %> +<% } -%> + np0 = <%=name%>_np * product<%=n64-1%>[0]; +<% if (!canOptimizeConsensys) { -%> + c += mpn_addmul_1(product<%=n64-1%>, mq, N, np0); + + mpn_copyi(pRawResult, product<%=n64-1%>+1, <%=name%>_N64); + + if (c || mpn_cmp(pRawResult, mq, <%=name%>_N64) >= 0) +<% } else { -%> + mpn_addmul_1(product<%=n64-1%>, mq, N, np0); + + mpn_copyi(pRawResult, product<%=n64-1%>+1, <%=name%>_N64); + + if (mpn_cmp(pRawResult, mq, <%=name%>_N64) >= 0) +<% } -%> + { + mpn_sub_n(pRawResult, pRawResult, mq, <%=name%>_N64); + } +} + +void <%=name%>_rawMSquare(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA) +{ + <%=name%>_rawMMul(pRawResult, pRawA, pRawA); +} + +void <%=name%>_rawMMul1(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA, uint64_t pRawB) +{ + const mp_size_t N = <%=name%>_N64+1; + const uint64_t *mq = <%=name%>_rawq; +<% if (!canOptimizeConsensys) { %> + uint64_t c = 0; +<% } -%> + uint64_t np0; +<% for (let i=0; i + uint64_t product<%=i%>[N] = {0}; +<% } %> + product0[N-1] = mpn_mul_1(product0, pRawA, <%=name%>_N64, pRawB); +<% for (let i=0; i + np0 = <%=name%>_np * product<%=i%>[0]; +<% if (!canOptimizeConsensys) { -%> + product<%=i+1%>[N-1] = mpn_addmul_1(product<%=i%>, mq, N, np0); +<% } else { -%> + product<%=i+1%>[1] = mpn_addmul_1(product<%=i%>, mq, N, np0); +<% } -%> + mpn_add(product<%=i+1%>, product<%=i+1%>, N, product<%=i%>+1, N-1); +<% } %> + np0 = <%=name%>_np * product<%=n64-1%>[0]; +<% if (!canOptimizeConsensys) { -%> + c = mpn_addmul_1(product<%=n64-1%>, mq, N, np0); + + mpn_copyi(pRawResult, product<%=n64-1%>+1, <%=name%>_N64); + + if (c || mpn_cmp(pRawResult, mq, <%=name%>_N64) >= 0) +<% } else { -%> + mpn_addmul_1(product<%=n64-1%>, mq, N, np0); + + mpn_copyi(pRawResult, product<%=n64-1%>+1, <%=name%>_N64); + + if (mpn_cmp(pRawResult, mq, <%=name%>_N64) >= 0) +<% } -%> + { + mpn_sub_n(pRawResult, pRawResult, mq, <%=name%>_N64); + } +} + +void <%=name%>_rawToMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA) +{ + <%=name%>_rawMMul(pRawResult, pRawA, <%=name%>_rawR2); +} + +void <%=name%>_rawFromMontgomery(<%=name%>RawElement pRawResult, const <%=name%>RawElement pRawA) +{ + const mp_size_t N = <%=name%>_N64+1; + const uint64_t *mq = <%=name%>_rawq; +<% if (!canOptimizeConsensys) { %> + uint64_t c = 0; +<% } -%> + uint64_t np0; + uint64_t product0[N]; +<% for (let i=1; i + uint64_t product<%=i%>[N] = {0}; +<% } %> + mpn_copyi(product0, pRawA, <%=name%>_N64); product0[N-1] = 0; +<% for (let i=0; i + np0 = <%=name%>_np * product<%=i%>[0]; +<% if (!canOptimizeConsensys) { -%> + product<%=i+1%>[N-1] = mpn_addmul_1(product<%=i%>, mq, N, np0); +<% } else { -%> + product<%=i+1%>[1] = mpn_addmul_1(product<%=i%>, mq, N, np0); +<% } -%> + mpn_add(product<%=i+1%>, product<%=i+1%>, N, product<%=i%>+1, N-1); +<% } %> + np0 = <%=name%>_np * product<%=n64-1%>[0]; +<% if (!canOptimizeConsensys) { -%> + c = mpn_addmul_1(product<%=n64-1%>, mq, N, np0); + + mpn_copyi(pRawResult, product<%=n64-1%>+1, <%=name%>_N64); + + if (c || mpn_cmp(pRawResult, mq, <%=name%>_N64) >= 0) +<% } else { -%> + mpn_addmul_1(product<%=n64-1%>, mq, N, np0); + + mpn_copyi(pRawResult, product<%=n64-1%>+1, <%=name%>_N64); + + if (mpn_cmp(pRawResult, mq, <%=name%>_N64) >= 0) +<% } -%> + { + mpn_sub_n(pRawResult, pRawResult, mq, <%=name%>_N64); + } +} + +int <%=name%>_rawIsZero(const <%=name%>RawElement rawA) +{ + return mpn_zero_p(rawA, <%=name%>_N64) ? 1 : 0; +} + +int <%=name%>_rawCmp(<%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + return mpn_cmp(pRawA, pRawB, <%=name%>_N64); +} + +void <%=name%>_rawSwap(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA) +{ + <%=name%>RawElement temp; + + <%=name%>_rawCopy(temp, pRawResult); + <%=name%>_rawCopy(pRawResult, pRawA); + <%=name%>_rawCopy(pRawA, temp); +} + +void <%=name%>_rawCopyS2L(<%=name%>RawElement pRawResult, int64_t val) +{ + pRawResult[0] = val; +<% for (let i=1; i + pRawResult[<%= i %>] = 0;<% } %> + + if (val < 0) { +<% for (let i=1; i + pRawResult[<%= i %>] = -1;<% } %> + + mpn_add_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawAnd(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + mpn_and_n(pRawResult, pRawA, pRawB, <%=name%>_N64); + + pRawResult[<%= n64 - 1 %>] &= lboMask; + + if (mpn_cmp(pRawResult, <%=name%>_rawq, <%=name%>_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawOr(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + mpn_ior_n(pRawResult, pRawA, pRawB, <%=name%>_N64); + + pRawResult[<%= n64 - 1 %>] &= lboMask; + + if (mpn_cmp(pRawResult, <%=name%>_rawq, <%=name%>_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawXor(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA, <%=name%>RawElement pRawB) +{ + mpn_xor_n(pRawResult, pRawA, pRawB, <%=name%>_N64); + + pRawResult[<%= n64 - 1 %>] &= lboMask; + + if (mpn_cmp(pRawResult, <%=name%>_rawq, <%=name%>_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawShl(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b) +{ + uint64_t bit_shift = b % 64; + uint64_t word_shift = b / 64; + uint64_t word_count = <%=name%>_N64 - word_shift; + + mpn_copyi(r + word_shift, a, word_count); + std::memset(r, 0, word_shift * sizeof(uint64_t)); + + if (bit_shift) + { + mpn_lshift(r, r, <%=name%>_N64, bit_shift); + } + + r[<%= n64 - 1 %>] &= lboMask; + + if (mpn_cmp(r, <%=name%>_rawq, <%=name%>_N64) >= 0) + { + mpn_sub_n(r, r, <%=name%>_rawq, <%=name%>_N64); + } +} + +void <%=name%>_rawShr(<%=name%>RawElement r, <%=name%>RawElement a, uint64_t b) +{ + const uint64_t bit_shift = b % 64; + const uint64_t word_shift = b / 64; + const uint64_t word_count = <%=name%>_N64 - word_shift; + + mpn_copyi(r, a + word_shift, word_count); + std::memset(r + word_count, 0, word_shift * sizeof(uint64_t)); + + if (bit_shift) + { + mpn_rshift(r, r, <%=name%>_N64, bit_shift); + } +} + +void <%=name%>_rawNot(<%=name%>RawElement pRawResult, <%=name%>RawElement pRawA) +{ + mpn_com(pRawResult, pRawA, <%=name%>_N64); + + pRawResult[<%= n64 - 1 %>] &= lboMask; + + if (mpn_cmp(pRawResult, <%=name%>_rawq, <%=name%>_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, <%=name%>_rawq, <%=name%>_N64); + } +}