Jovian
⭐️
Sign In
In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import re
import numpy as np
pd.options.mode.chained_assignment = None
In [2]:
df = pd.read_csv('malware-classifier-ipynb-b918e/api_call_hist_normalize/api_call_hist_normalize.csv')
In [3]:
X = df.loc[:, 'SetUnhandledExceptionFilter':'JsEval'].values
y_str = df.loc[:, 'label'].values
y_str_cleaned = [re.split('[\.\!]', labels.split(',')[-1])[0] for labels in y_str ]

id = df.loc[:,'sha1'].values
print(type(X))

X = np.column_stack((id,X))
print(type(X))


label_encoder = LabelEncoder()
label_encoder.fit(y_str_cleaned)
y = label_encoder.transform(y_str_cleaned)

DATASET_SIZE = len(X)
print(DATASET_SIZE)
<class 'numpy.ndarray'> <class 'numpy.ndarray'> 20330
In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)
X_train, X_val, y_train, y_val   = train_test_split(X_train, y_train, test_size=0.2)

In [13]:
import tensorflow as tf

def Transfer2TfData(data, label, shuffle=True, batch_size=32):    
    id = data[:,0]
    _data = data[:,1:]
    ds_data = tf.data.Dataset.from_tensor_slices((dict(names = id.tolist(), formats = _data.tolist()), label))
    
    if shuffle:
        ds_data.shuffle(buffer_size = len(data))
    ds_data = ds_data.batch(batch_size)
    return ds_data

ds_train = Transfer2TfData(X_train, y_train, True)
In [11]:
ds_val = Transfer2TfData(X_val, y_val, True)
ds_test = Transfer2TfData(X_test, y_test, True)
In [22]:
from tensorflow import feature_column
from tensorflow.keras import layers

feature_columns = []
myheader = list(df)[3:]
for column_name in myheader:
    feature_columns.append(feature_column.numeric_column(column_name))
In [23]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
In [25]:
model = tf.keras.Sequential([
  feature_layer,
  layers.Dense(1000, activation='relu'),
  layers.Dense(1000, activation='relu'),
  layers.Dense(237, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

ds_train = Transfer2TfData(X_train, y_train, True)
ds_val = Transfer2TfData(X_val, y_val, True)
ds_test = Transfer2TfData(X_test, y_test, True)

model.fit(ds_train,
          validation_data=ds_val,
          epochs=10)
WARNING: Logging before flag parsing goes to stderr. W0506 23:33:35.107796 3064 training_utils.py:1353] Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.
Epoch 1/10
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-25-65c35c324ab5> in <module> 16 model.fit(ds_train, 17 validation_data=ds_val, ---> 18 epochs=10) F:\Python36\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 789 workers=0, 790 shuffle=shuffle, --> 791 initial_epoch=initial_epoch) 792 793 # Case 3: Symbolic tensors or Numpy array-like. F:\Python36\lib\site-packages\tensorflow\python\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) 1513 shuffle=shuffle, 1514 initial_epoch=initial_epoch, -> 1515 steps_name='steps_per_epoch') 1516 1517 def evaluate_generator(self, F:\Python36\lib\site-packages\tensorflow\python\keras\engine\training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs) 255 256 is_deferred = not model._is_compiled --> 257 batch_outs = batch_function(*batch_data) 258 if not isinstance(batch_outs, list): 259 batch_outs = [batch_outs] F:\Python36\lib\site-packages\tensorflow\python\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics) 1236 x, y, sample_weights = self._standardize_user_data( 1237 x, y, sample_weight=sample_weight, class_weight=class_weight, -> 1238 extract_tensors_from_dataset=True) 1239 1240 if self.run_eagerly: F:\Python36\lib\site-packages\tensorflow\python\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset) 2501 else: 2502 cast_inputs = x_input -> 2503 self._set_inputs(cast_inputs) 2504 else: 2505 y_input = y F:\Python36\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs) 454 self._setattr_tracking = False # pylint: disable=protected-access 455 try: --> 456 result = method(self, *args, **kwargs) 457 finally: 458 self._setattr_tracking = previous_value # pylint: disable=protected-access F:\Python36\lib\site-packages\tensorflow\python\keras\engine\training.py in _set_inputs(self, inputs, outputs, training) 2771 self, '_contains_symbolic_tensors', False) 2772 if self._expects_training_arg: -> 2773 outputs = self.call(inputs, training=training) 2774 else: 2775 outputs = self.call(inputs) F:\Python36\lib\site-packages\tensorflow\python\keras\engine\sequential.py in call(self, inputs, training, mask) 254 kwargs['training'] = training 255 --> 256 outputs = layer(inputs, **kwargs) 257 258 # `outputs` will be the inputs to the next layer. F:\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs) 610 base_layer_utils.AutoAddUpdates(self, 611 inputs)) as auto_updater: --> 612 outputs = self.call(inputs, *args, **kwargs) 613 auto_updater.set_outputs(outputs) 614 F:\Python36\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py in call(self, features, cols_to_output_tensors) 471 with ops.name_scope(column.name): 472 tensor = column.get_dense_tensor(transformation_cache, --> 473 self._state_manager) 474 processed_tensors = self._process_dense_tensor(column, tensor) 475 if cols_to_output_tensors is not None: F:\Python36\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py in get_dense_tensor(self, transformation_cache, state_manager) 2808 # Feature has been already transformed. Return the intermediate 2809 # representation created by _transform_feature. -> 2810 return transformation_cache.get(self, state_manager) 2811 2812 @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, F:\Python36\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py in get(self, key, state_manager) 2571 column = key 2572 logging.debug('Transforming feature_column %s.', column) -> 2573 transformed = column.transform_feature(self, state_manager) 2574 if transformed is None: 2575 raise ValueError('Column {} is not supported.'.format(column.name)) F:\Python36\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py in transform_feature(self, transformation_cache, state_manager) 2780 ValueError: If a SparseTensor is passed in. 2781 """ -> 2782 input_tensor = transformation_cache.get(self.key, state_manager) 2783 return self._transform_input_tensor(input_tensor) 2784 F:\Python36\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py in get(self, key, state_manager) 2563 2564 if isinstance(key, six.string_types): -> 2565 raise ValueError('Feature {} is not in features dictionary.'.format(key)) 2566 2567 if not isinstance(key, FeatureColumn): ValueError: Feature CDocument_write is not in features dictionary.
In [ ]:
import jovian
jovian.commit()
[jovian] Saving notebook..