aboutsummaryrefslogtreecommitdiff
path: root/ext/ipp/sources/ippcp/src/pcpbnuarith.h
blob: eaca1a21249b572a076201665d25f57264eeb457 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/*############################################################################
  # Copyright 1999-2018 Intel Corporation
  #
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #     http://www.apache.org/licenses/LICENSE-2.0
  #
  # Unless required by applicable law or agreed to in writing, software
  # distributed under the License is distributed on an "AS IS" BASIS,
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  # See the License for the specific language governing permissions and
  # limitations under the License.
  ############################################################################*/

/* 
//  Purpose:
//     Intel(R) Integrated Performance Primitives.
//     Internal Unsigned internal arithmetic
// 
// 
*/

#if !defined(_CP_BNU_ARITH_H)
#define _CP_BNU_ARITH_H

#include "pcpbnuimpl.h"
#include "pcpbnu32arith.h"
#include "pcpmulbnukara.h"

#define     cpAdd_BNU OWNAPI(cpAdd_BNU)
BNU_CHUNK_T cpAdd_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, cpSize ns);
#define     cpSub_BNU OWNAPI(cpSub_BNU)
BNU_CHUNK_T cpSub_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, cpSize ns);
#define     cpInc_BNU OWNAPI(cpInc_BNU)
BNU_CHUNK_T cpInc_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, cpSize ns, BNU_CHUNK_T val);
#define     cpDec_BNU OWNAPI(cpDec_BNU)
BNU_CHUNK_T cpDec_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, cpSize ns, BNU_CHUNK_T val);

#if defined(_USE_KARATSUBA_)
#define     cpAddAdd_BNU OWNAPI(cpAddAdd_BNU)
BNU_CHUNK_T cpAddAdd_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, const BNU_CHUNK_T* pC, cpSize size);
#define     cpAddSub_BNU OWNAPI(cpAddSub_BNU)
BNU_CHUNK_T cpAddSub_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, const BNU_CHUNK_T* pB, const BNU_CHUNK_T* pC, cpSize size);
#endif

#define     cpAddMulDgt_BNU OWNAPI(cpAddMulDgt_BNU)
BNU_CHUNK_T cpAddMulDgt_BNU(BNU_CHUNK_T* pR, const BNU_CHUNK_T* pA, cpSize ns, BNU_CHUNK_T val);


#define     cpMulAdc_BNU_school OWNAPI(cpMulAdc_BNU_school)
BNU_CHUNK_T cpMulAdc_BNU_school(BNU_CHUNK_T* pR,
                         const BNU_CHUNK_T* pA, cpSize nsA,
                         const BNU_CHUNK_T* pB, cpSize nsB);
#define     cpMulAdx_BNU_school OWNAPI(cpMulAdx_BNU_school)
BNU_CHUNK_T cpMulAdx_BNU_school(BNU_CHUNK_T* pR,
                         const BNU_CHUNK_T* pA, cpSize nsA,
                         const BNU_CHUNK_T* pB, cpSize nsB);

__INLINE BNU_CHUNK_T cpMul_BNU_school(BNU_CHUNK_T* pR,
                                const BNU_CHUNK_T* pA, cpSize nsA,
                                const BNU_CHUNK_T* pB, cpSize nsB)
{
#if(_ADCOX_NI_ENABLING_==_FEATURE_ON_)
   return cpMulAdx_BNU_school(pR, pA,nsA, pB,nsB);
#elif(_ADCOX_NI_ENABLING_==_FEATURE_TICKTOCK_)
   return IsFeatureEnabled(ADCOX_ENABLED)? cpMulAdx_BNU_school(pR, pA,nsA, pB,nsB)
                                         : cpMulAdc_BNU_school(pR, pA,nsA, pB,nsB);
#else
   return cpMulAdc_BNU_school(pR, pA,nsA, pB,nsB);
#endif
}


#define     cpSqrAdc_BNU_school OWNAPI(cpSqrAdc_BNU_school)
BNU_CHUNK_T cpSqrAdc_BNU_school(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA, cpSize nsA);

#define     cpSqrAdx_BNU_school OWNAPI(cpSqrAdx_BNU_school)
BNU_CHUNK_T cpSqrAdx_BNU_school(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA, cpSize nsA);

__INLINE BNU_CHUNK_T cpSqr_BNU_school(BNU_CHUNK_T * pR, const BNU_CHUNK_T * pA, cpSize nsA)
{
#if(_ADCOX_NI_ENABLING_==_FEATURE_ON_)
   return cpSqrAdx_BNU_school(pR, pA,nsA);
#elif(_ADCOX_NI_ENABLING_==_FEATURE_TICKTOCK_)
   return IsFeatureEnabled(ADCOX_ENABLED)? cpSqrAdx_BNU_school(pR, pA,nsA)
                                         : cpSqrAdc_BNU_school(pR, pA,nsA);
#else
   return cpSqrAdc_BNU_school(pR, pA,nsA);
#endif
}

#define     cpGcd_BNU OWNAPI(cpGcd_BNU)
BNU_CHUNK_T cpGcd_BNU(BNU_CHUNK_T a, BNU_CHUNK_T b);

#define cpModInv_BNU OWNAPI(cpModInv_BNU)
int     cpModInv_BNU(BNU_CHUNK_T* pInv,
               const BNU_CHUNK_T* pA, cpSize nsA,
               const BNU_CHUNK_T* pM, cpSize nsM,
                     BNU_CHUNK_T* bufInv, BNU_CHUNK_T* bufA, BNU_CHUNK_T* bufM);


/*
// multiplication/squaring wrappers
*/
__INLINE cpSize cpMul_BNU_BufferSize(cpSize opLen)
{
#if !defined (_USE_KARATSUBA_)
   UNREFERENCED_PARAMETER(opLen);
   return 0;
#else
   return cpKaratsubaBufferSize(opLen);
#endif
}
__INLINE BNU_CHUNK_T cpMul_BNU(BNU_CHUNK_T* pR,
                         const BNU_CHUNK_T* pA, cpSize nsA,
                         const BNU_CHUNK_T* pB, cpSize nsB,
                               BNU_CHUNK_T* pBuffer)
{
#if !defined(_USE_KARATSUBA_)
   UNREFERENCED_PARAMETER(pBuffer);
   return cpMul_BNU_school(pR, pA,nsA, pB,nsB);
#else
   if(nsA!=nsB || nsA<CP_KARATSUBA_MUL_THRESHOLD || !pBuffer)
      return cpMul_BNU_school(pR, pA,nsA, pB,nsB);
   else
      return cpMul_BNU_karatsuba(pR, pA,pB,nsA, pBuffer);
#endif
}
__INLINE BNU_CHUNK_T cpSqr_BNU(BNU_CHUNK_T * pR,
                         const BNU_CHUNK_T * pA, cpSize nsA,
                               BNU_CHUNK_T* pBuffer)
{
#if !defined(_USE_KARATSUBA_)
   UNREFERENCED_PARAMETER(pBuffer);
   return cpSqr_BNU_school(pR, pA,nsA);
#else
   if(nsA<CP_KARATSUBA_SQR_THRESHOLD || !pBuffer)
      return cpSqr_BNU_school(pR, pA,nsA);
   else
      return cpSqr_BNU_karatsuba(pR, pA,nsA, pBuffer);
#endif
}

/*
// division/reduction wrappers
*/
__INLINE cpSize cpDiv_BNU(BNU_CHUNK_T* pQ, cpSize* pnsQ, BNU_CHUNK_T* pA, cpSize nsA, BNU_CHUNK_T* pB, cpSize nsB)
{
   int nsR = cpDiv_BNU32((Ipp32u*)pQ, pnsQ,
                         (Ipp32u*)pA, nsA*(sizeof(BNU_CHUNK_T)/sizeof(Ipp32u)),
                         (Ipp32u*)pB, nsB*(sizeof(BNU_CHUNK_T)/sizeof(Ipp32u)));
   #if (BNU_CHUNK_BITS == BNU_CHUNK_64BIT)
   if(nsR&1) ((Ipp32u*)pA)[nsR] = 0;
   nsR = INTERNAL_BNU_LENGTH(nsR);
   if(pQ) {
      if(*pnsQ&1) ((Ipp32u*)pQ)[*pnsQ] = 0;
      *pnsQ = INTERNAL_BNU_LENGTH(*pnsQ);
   }
   #endif
   return nsR;
}

__INLINE cpSize cpMod_BNU(BNU_CHUNK_T* pX, cpSize nsX, BNU_CHUNK_T* pModulus, cpSize nsM)
{
   return cpDiv_BNU(NULL,NULL, pX,nsX, pModulus, nsM);
}

#endif /* _CP_BNU_ARITH_H */