Learn practical skills, build real-world projects, and advance your career
import numpy as np
import sys

global OPS
OPS = 0


global OPS2
OPS2 = 0

def _mul(a, b):
    global OPS
    OPS += 1
    return a * b

# OPS2 is the number of iterations for the new solution

def _mul2(a, b):
    global OPS2
    OPS2 += 1
    return a * b

def _add(i,j,M,B):
    global OPS2
    OPS2 += 1
    s=M[j,i-1]
    if j>=i :
                if 2*i-j==1:
                    s+=B[i-1,j-i]
                else :
                    s+=B[i-1,j-i]+B[j-i,i-1]
    return s
    


# kernel2 is the new proposed algorithm, with an asymptotic complexity of O(N²)
def kernel2(A, B, Y, N):
    global OPS2
    M = np.zeros((2*(N-1)+1,2*(N-1)+1), dtype=int)
    for i in range(1,N):
        for j in range(1,2*i):
            M[j,i]=_add(i,j,M,B)
    for i in range(1,N):
        for j in range(2*(i-1)+1):
            Y[i] += _mul2(A[i,j], M[j+1,i])
            
# the first solution is conserved for the comparaison 
def kernel(A, B, Y, N):

    for i in range(N):
        for j in range(i):
            for k in range(i):
                # Y[i] += A[i,j+k] * B[k,j]
                Y[i] += _mul(A[i,j+k], B[k,j])


def main():
    global OPS
    global OPS2

    # set default problem size or parse args
    N = 250
    #if len(sys.argv) > 1:
     #   N = int(sys.argv[1])

    # initialize some random data
    A = np.random.randint(0, 1000, size=(N,2*N))
    B = np.random.randint(0, 1000, size=(N,N))
    # result of the old solution
    Y = np.zeros(N, dtype=int)
    # result of the new solution 
    Y2 = np.zeros(N, dtype=int)

    # main work
    kernel(A, B, Y, N)
    kernel2(A, B, Y2, N)

    # print summary
    # printing the difference between the two outputs 
    print(Y2-Y)
    print('N =', N)
    print('-> OPS =', OPS)
    print('-> OPS2 =', OPS2)


if __name__ == '__main__':
    main()
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] N = 250 -> OPS = 5177125 -> OPS2 = 124002