Learn practical skills, build real-world projects, and advance your career
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import re
pd.options.mode.chained_assignment = None

import numpy as np
from catboost import CatBoostClassifier, Pool
from sklearn import metrics
def printInfo(data):
    print(type(data))
    print(data.shape)
    print("---------------------")
    print(data[:5])
df = pd.read_csv('api_call_hist_normalize/api_call_hist_normalize.csv')
printInfo(df)
<class 'pandas.core.frame.DataFrame'> (20330, 310) --------------------- Unnamed: 0 sha1 \ 0 0 0005e3eba4ccabb43cffcd101a1c2424ab8b08fb 1 1 0006a13f031d83a5a7b577ebe88b1f7e51aed1be 2 2 000b8ecf81f3b917b0dc243fb66c6b3e0b8f2ec2 3 3 000befa6a9ebb2d63e9fe9aa3812e1512a2f1321 4 4 000deee4291dcb91cee866eaa298f5a6abb26e5b label SetUnhandledExceptionFilter \ 0 Trojan:Win32/Fuerboos.C!cl 0.0 1 Trojan:Win32/Emotet!rfn 13.0 2 Trojan:Win32/Fuery.A!cl 5.0 3 VirTool:Win32/VBInject 0.0 4 Trojan:Win32/Emotet.AC!bit 0.0 UnhookWindowsHookEx accept bind closesocket \ 0 0.0 0.0 0.0 0.0 1 0.0 0.0 12.0 0.0 2 0.0 0.0 0.0 0.0 3 0.0 0.0 0.0 0.0 4 0.0 0.0 0.0 0.0 CoInternetSetFeatureEnabled connect ... CryptEnumProvidersW \ 0 0.0 0.0 ... 0.0 1 0.0 0.0 ... 0.0 2 0.0 0.0 ... 0.0 3 0.0 0.0 ... 0.0 4 0.0 0.0 ... 0.0 CryptExportKey CryptHashData CryptGenKey CryptImportPublicKeyInfo \ 0 0.0 0.0 0.0 0.0 1 0.0 5.0 0.0 0.0 2 0.0 0.0 0.0 0.0 3 0.0 0.0 0.0 0.0 4 0.0 0.0 0.0 0.0 HTTPSCertificateTrust HTTPSFinalProv CDocument_write \ 0 0.0 0.0 0.0 1 0.0 0.0 0.0 2 0.0 0.0 0.0 3 0.0 0.0 0.0 4 0.0 0.0 0.0 COleScript_ParseScriptText JsEval 0 0.0 0.0 1 0.0 0.0 2 0.0 0.0 3 0.0 0.0 4 0.0 0.0 [5 rows x 310 columns]
# get all label numbers on top 20
top_20 = df['label'].value_counts()[:20]
printInfo(top_20)
<class 'pandas.core.series.Series'> (20,) --------------------- Trojan:Win32/Emotet.AC!bit 4067 Trojan:Win32/Fuerboos.C!cl 1614 Trojan:Win32/Emotet!rfn 960 Trojan:Win32/Emotet.PA!MTB 873 Trojan:Win32/Emotet.LK!ml 576 Trojan:Win32/Occamy.C 519 Trojan:Win32/Dynamer!rfn 445 Trojan:Win32/Tiggre!plock 331 Trojan:Win32/Skeeyah.A!rfn 305 VirTool:Win32/VBInject 300 Trojan:Win32/Sonbokli.A!cl 279 Trojan:HTML/Brocoiner.A!lib 279 Trojan:Win32/Azden.A!cl 277 Trojan:Win32/Fuery.C!cl 270 Trojan:Win32/Fuerboos.A!cl 264 Trojan:Win32/MereTam.A 259 Trojan:Win32/Casdet!rfn 245 VirTool:Win32/VBInject.OX!bit 242 VirTool:Win32/VBInject.ACS!bit 218 Trojan:Win32/Emali.A!cl 218 Name: label, dtype: int64
print(top_20.index)
Index(['Trojan:Win32/Emotet.AC!bit', 'Trojan:Win32/Fuerboos.C!cl', 'Trojan:Win32/Emotet!rfn', 'Trojan:Win32/Emotet.PA!MTB', 'Trojan:Win32/Emotet.LK!ml', 'Trojan:Win32/Occamy.C', 'Trojan:Win32/Dynamer!rfn', 'Trojan:Win32/Tiggre!plock', 'Trojan:Win32/Skeeyah.A!rfn', 'VirTool:Win32/VBInject', 'Trojan:Win32/Sonbokli.A!cl', 'Trojan:HTML/Brocoiner.A!lib', 'Trojan:Win32/Azden.A!cl', 'Trojan:Win32/Fuery.C!cl', 'Trojan:Win32/Fuerboos.A!cl', 'Trojan:Win32/MereTam.A', 'Trojan:Win32/Casdet!rfn', 'VirTool:Win32/VBInject.OX!bit', 'VirTool:Win32/VBInject.ACS!bit', 'Trojan:Win32/Emali.A!cl'], dtype='object')