Learn practical skills, build real-world projects, and advance your career
Created 2 years ago
import numpy as np
import sys
global OPS
OPS = 0
global OPS2
OPS2 = 0
def _mul(a, b):
global OPS
OPS += 1
return a * b
# OPS2 is the number of iterations for the new solution
def _mul2(a, b):
global OPS2
OPS2 += 1
return a * b
def _add(i,j,M,B):
global OPS2
OPS2 += 1
s=M[j,i-1]
if j>=i :
if 2*i-j==1:
s+=B[i-1,j-i]
else :
s+=B[i-1,j-i]+B[j-i,i-1]
return s
# kernel2 is the new proposed algorithm, with an asymptotic complexity of O(N²)
def kernel2(A, B, Y, N):
global OPS2
M = np.zeros((2*(N-1)+1,2*(N-1)+1), dtype=int)
for i in range(1,N):
for j in range(1,2*i):
M[j,i]=_add(i,j,M,B)
for i in range(1,N):
for j in range(2*(i-1)+1):
Y[i] += _mul2(A[i,j], M[j+1,i])
# the first solution is conserved for the comparaison
def kernel(A, B, Y, N):
for i in range(N):
for j in range(i):
for k in range(i):
# Y[i] += A[i,j+k] * B[k,j]
Y[i] += _mul(A[i,j+k], B[k,j])
def main():
global OPS
global OPS2
# set default problem size or parse args
N = 250
#if len(sys.argv) > 1:
# N = int(sys.argv[1])
# initialize some random data
A = np.random.randint(0, 1000, size=(N,2*N))
B = np.random.randint(0, 1000, size=(N,N))
# result of the old solution
Y = np.zeros(N, dtype=int)
# result of the new solution
Y2 = np.zeros(N, dtype=int)
# main work
kernel(A, B, Y, N)
kernel2(A, B, Y2, N)
# print summary
# printing the difference between the two outputs
print(Y2-Y)
print('N =', N)
print('-> OPS =', OPS)
print('-> OPS2 =', OPS2)
if __name__ == '__main__':
main()
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
N = 250
-> OPS = 5177125
-> OPS2 = 124002