RNN with and without loops : What each output means?

The Code Below is my basic RNN :

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, device):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.device = device

        self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first = True)

    def forward(self,x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        out, ht = self.rnn(x, h0)
        return out,ht
#Initializing the rnn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
rnn = RNN(28, 128, 1 , 10, device)

I passed the input to the rnn without looping:

#Without Loop
inps = torch.randn((32,100,28))
out_wl,ht_wl = rnn(inps)
print("Output shape : ", out_wl.shape) #Output shape :  torch.Size([32, 100, 128])
print("Hidden shape : ", ht_wl.shape)  #Hidden shape :  torch.Size([1, 32, 128])

Then I instead of all input at once passed one timestamp(row) at a time:

#With Loop
outs = []
hns = []
for inp in inps:
  inp = inp.unsqueeze(0)
  out_temp,hn_temp = rnn(inp)
  outs.append(out_temp)
  hns.append(hn_temp)

outs = torch.stack(outs)
hns = torch.stack(hns)

print(outs.shape) #torch.Size([32, 1, 100, 128])
print(hns.shape) #torch.Size([32, 1, 1, 128])

outs = outs.squeeze(1)
hns = hns.squeeze(1)

print(outs.shape) #torch.Size([32, 100, 128])
print(hns.shape) #torch.Size([32, 1, 128])

But when I compare the outputs from the loop and without the loop part they are not the same. :

out_wl == outs

Can anyone help me with the following,

  • What is actual difference between the both outputs of rnn and is out_wl just h_t from all timestamps or in otherwords the last timestamp of out_wl is h_t?

  • When the loop on the input why the out(with loop) is not same as out_wl ( without loop)