Merge 8739e91fb8 into a910f2f385

2018-01-30 18:51:08 +00:00 · 2018-01-30 18:51:08 +00:00 · 71f239a516
parent a910f2f385 8739e91fb8
commit 71f239a516
10 changed files with 159 additions and 135 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
 .idea/*
 .vscode/*
 logs/*
 data/*
 .data/*
 _pycache_\*
--- a/2
+++ b/2
@ -1,4 +1,5 @@
 FROM nvidia/cuda:8.0-cudnn6-devel
 MAINTAINER snehil.wakchaure1@cerner.com
 ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
@ -36,6 +37,7 @@ RUN conda install -y python=${python_version} && \
    pip install opencv-contrib-python && \
    conda install -y Pillow scikit-learn scikit-image graphviz pydot notebook pandas matplotlib mkl nose pyyaml six h5py && \
    pip install keras && \
    pip install tensorflow && \
    conda clean -yt
 ENV PYTHONPATH='/src/:$PYTHONPATH'
--- a/README.md
+++ b/README.md
@ -1,8 +1,6 @@
-# Abnormal Event Detection in Videos Using Spatiotemporal Autoencoder
+# Anomaly detection in Smart Audio Videos using a Convolutional Spatio-temporal Auto-encoder and Convolutional Long-Short Term Memory (LSTM) Recurrent Neural Network based architecture.
 This repository hosts the codes for "Abnormal Event Detection in Videos Using Spatiotemporal Autoencoder".
 Paper can be found at [Springer](https://link.springer.com/chapter/10.1007/978-3-319-59081-3_23) and [arXiv](https://arxiv.org/abs/1701.01546).
-Prerequisites:
+## Project dependencies:
 - keras
 - tensorflow
 - h5py
@ -12,32 +10,37 @@ Prerequisites:
 - tqdm (for progressbar)
 - coloredlogs (optional, for colored terminal logs only)
-You can use the `Dockerfile` provided to build the environment then enter the environment using `nvidia-docker run --rm -it -v HOST_FOLDER:/share DOCKER_IMAGE bash`.
+To build the docker image run the following docker command. This will take around 13GB space on your machine so makes sure you clean up any unused docker images and containers to avoid out of free space issues.
 ```docker build -t snehil/video_anomaly_ai:v1 .```
-To train the model, just run `python start_train.py`. Default configuration can be found at `config.yml`. You need to prepare video dataset you plan to train/evaluate on. You may get the benchmark dataset videos from respective authors. For each dataset, put the training videos into `VIDEO_ROOT_PATH/DATASET_NAME/training_videos` and testing videos into `VIDEO_ROOT_PATH/DATASET_NAME/testing_videos`. Example structure of training videos for `avenue` dataset:
+You can then enter the environment using ```nvidia-docker run --rm -it -v HOST_FOLDER:/share DOCKER_IMAGE bash```.
- `VIDEO_ROOT_PATH/avenue/training_videos`
+
 or run the following command:
 ```docker run --rm -it -v ~/:/share snehil/<IMAGE_ID> bash```
 To login to a running container:
 ```docker attach <CONTAINER_ID>```
 ## To train the model 
 - Just run `python start_train.py`. 
 - Default configuration can be found at `config.yml`. 
 - You need to prepare video dataset you plan to train/evaluate on (avi or mp4). For each dataset, put the training videos into ```   
   ./data/videos/training_videos``` and testing videos into ```./data/videos/testing_videos```. Example structure of training videos for `avenue` dataset:
 - `VIDEO_ROOT_PATH/avenue/training_videos`
  - `01.avi`
  - `02.avi`
  - ...
  - `16.avi`
 ## To test the model
 Once you have trained the model, you may now run `python start_test.py` after setting the parameters at the beginning of the file.
-Please cite the following paper if you use our code / paper:
+# Troubleshooting
 In case you get a No free space left error (inspite of having free space) while running the docker build command, try cleaning up unused docker images and containers using the following commands
 ```
-@inbook{Chong2017,
+docker rm $(docker ps -q -f 'status=exited')
-  author    = {Chong, Yong Shean and
+docker rmi $(docker images -q -f "dangling=true")
-               Tay, Yong Haur},
+
  editor    = {Cong, Fengyu and
               Leung, Andrew and
               Wei, Qinglai},
  title     = {Abnormal Event Detection in Videos Using Spatiotemporal Autoencoder},
  bookTitle = {Advances in Neural Networks - ISNN 2017: 14th International Symposium, ISNN 2017, Sapporo, Hakodate, and Muroran, Hokkaido, Japan, June 21--26, 2017, Proceedings, Part II},
  year      = {2017},
  publisher = {Springer International Publishing},
  pages     = {189--196},
  isbn      = {978-3-319-59081-3},
  doi       = {10.1007/978-3-319-59081-3_23},
  url       = {https://doi.org/10.1007/978-3-319-59081-3_23}
 }
 ```
--- a/classifier.py
+++ b/classifier.py
@ -1,36 +1,58 @@
 from dataset import preprocess_data
 import os
 from keras import backend as K
 import matplotlib
 from   keras      import backend as K
 from   dataset    import preprocess_data
 matplotlib.use('Agg')
 assert(K.image_data_format() == 'channels_last')
 def get_model(t):
-    from keras.models import Model
+    from keras.models                         import Model
-    from keras.layers.convolutional import Conv2D, Conv2DTranspose
+    from keras.layers.convolutional           import Conv2D, Conv2DTranspose
    from keras.layers.convolutional_recurrent import ConvLSTM2D
-    from keras.layers.normalization import BatchNormalization
+    from keras.layers.normalization           import BatchNormalization
-    from keras.layers.wrappers import TimeDistributed
+    from keras.layers.wrappers                import TimeDistributed
-    from keras.layers.core import Activation
+    from keras.layers.core                    import Activation
-    from keras.layers import Input
+    from keras.layers                         import Input
    input_tensor = Input(shape=(t, 224, 224, 1))
-    conv1 = TimeDistributed(Conv2D(128, kernel_size=(11, 11), padding='same', strides=(4, 4), name='conv1'),
+    # First Conv layer
-                            input_shape=(t, 224, 224, 1))(input_tensor)
+    conv1        = TimeDistributed(
-    conv1 = TimeDistributed(BatchNormalization())(conv1)
+                        Conv2D(
-    conv1 = TimeDistributed(Activation('relu'))(conv1)
+                            128, 
                            kernel_size = (11, 11), 
                            padding     = 'same',  # zero-padding
                            strides     = (4, 4), 
                            name        = 'conv1'
                        ),
                        input_shape=(t, 224, 224, 1)
                    )(input_tensor)
-    conv2 = TimeDistributed(Conv2D(64, kernel_size=(5, 5), padding='same', strides=(2, 2), name='conv2'))(conv1)
+    conv1        = TimeDistributed(BatchNormalization())(conv1)
-    conv2 = TimeDistributed(BatchNormalization())(conv2)
+    conv1        = TimeDistributed(Activation('relu'))(conv1)
    conv2 = TimeDistributed(Activation('relu'))(conv2)
    # Second Conv layer
    conv2        = TimeDistributed(
                        Conv2D(
                            64, 
                            kernel_size = (5, 5), 
                            padding     = 'same', # zero-padding
                            strides     = (2, 2), 
                            name        = 'conv2'
                        )
                    )(conv1)
    conv2        = TimeDistributed(BatchNormalization())(conv2)
    conv2        = TimeDistributed(Activation('relu'))(conv2)
    # LSTM layers
    convlstm1 = ConvLSTM2D(64, kernel_size=(3, 3), padding='same', return_sequences=True, name='convlstm1')(conv2)
    convlstm2 = ConvLSTM2D(32, kernel_size=(3, 3), padding='same', return_sequences=True, name='convlstm2')(convlstm1)
    convlstm3 = ConvLSTM2D(64, kernel_size=(3, 3), padding='same', return_sequences=True, name='convlstm3')(convlstm2)
    # De-convolution layers
    deconv1 = TimeDistributed(Conv2DTranspose(128, kernel_size=(5, 5), padding='same', strides=(2, 2), name='deconv1'))(convlstm3)
    deconv1 = TimeDistributed(BatchNormalization())(deconv1)
    deconv1 = TimeDistributed(Activation('relu'))(deconv1)
@ -55,28 +77,28 @@ def compile_model(model, loss, optimizer):
    model.compile(loss=loss, optimizer=opt)
-def train(dataset, job_folder, logger, video_root_path='/share/data/videos'):
+def train(dataset, job_folder, logger, video_root_path='./data/videos'):
    """Build and train the model
    """
    import yaml
-    import numpy as np
+    import numpy                as     np
-    from keras.callbacks import ModelCheckpoint, EarlyStopping
+    from   keras.callbacks      import ModelCheckpoint, EarlyStopping
-    from custom_callback import LossHistory
+    from   custom_callback      import LossHistory
-    import matplotlib.pyplot as plt
+    import matplotlib.pyplot    as     plt
-    from keras.utils.io_utils import HDF5Matrix
+    from   keras.utils.io_utils import HDF5Matrix
    logger.debug("Loading configs from {}".format(os.path.join(job_folder, 'config.yml')))
    with open(os.path.join(job_folder, 'config.yml'), 'r') as ymlfile:
        cfg = yaml.load(ymlfile)
-    nb_epoch = cfg['epochs']
+    nb_epoch    = cfg['epochs']
-    batch_size = cfg['batch_size']
+    batch_size  = cfg['batch_size']
-    loss = cfg['cost']
+    loss        = cfg['cost']
-    optimizer = cfg['optimizer']
+    optimizer   = cfg['optimizer']
    time_length = cfg['time_length']
    # shuffle = cfg['shuffle']
-    logger.info("Building model of type {} and activation {}".format(model_type, activation))
+    # logger.info("Building model of type {} and activation {}".format(model_type, activation))
    model = get_model(time_length)
    logger.info("Compiling model with {} and {} optimizer".format(loss, optimizer))
    compile_model(model, loss, optimizer)
@ -91,20 +113,21 @@ def train(dataset, job_folder, logger, video_root_path='/share/data/videos'):
    data = HDF5Matrix(os.path.join(video_root_path, '{0}/{0}_train_t{1}.h5'.format(dataset, time_length)),
                      'data')
-    snapshot = ModelCheckpoint(os.path.join(job_folder,
+    snapshot    = ModelCheckpoint(os.path.join(job_folder,
-               'model_snapshot_e{epoch:03d}_{val_loss:.6f}.h5'))
+                    'model_snapshot_e{epoch:03d}_{val_loss:.6f}.h5'))
-    earlystop = EarlyStopping(patience=10)
+    earlystop   = EarlyStopping(patience=10)
    history_log = LossHistory(job_folder=job_folder, logger=logger)
    logger.info("Initializing training...")
    history = model.fit(
-        data, data,
+        data, # X 
-        batch_size=batch_size,
+        data, # Y (Same as input for Auto-encoder)
-        epochs=nb_epoch,
+        batch_size       = batch_size,
-        validation_split=0.15,
+        epochs           = nb_epoch,
-        shuffle='batch',
+        validation_split = 0.15,
-        callbacks=[snapshot, earlystop, history_log]
+        shuffle          = 'batch',
        callbacks        = [snapshot, earlystop, history_log]
    )
    logger.info("Training completed!")
@ -130,9 +153,9 @@ def get_gt_vid(dataset, vid_idx, pred_vid):
    import numpy as np
    if dataset in ("indoor", "plaza", "lawn"):
-        gt_vid = np.load('/share/data/groundtruths/{0}_test_gt.npy'.format(dataset))
+        gt_vid = np.load('./data/groundtruths/{0}_test_gt.npy'.format(dataset))
    else:
-        gt_vid_raw = np.loadtxt('/share/data/groundtruths/gt_{0}_vid{1:02d}.txt'.format(dataset, vid_idx+1))
+        gt_vid_raw = np.loadtxt('./data/groundtruths/gt_{0}_vid{1:02d}.txt'.format(dataset, vid_idx+1))
        gt_vid = np.zeros_like(pred_vid)
        try:
@ -198,33 +221,32 @@ def calc_auc_overall(logger, dataset, n_vid, save_path):
 def test(logger, dataset, t, job_uuid, epoch, val_loss, visualize_score=True, visualize_frame=False,
-         video_root_path='/share/data/videos'):
+         video_root_path='./data/videos'):
    import numpy as np
    from keras.models import load_model
    import os
    import h5py
-    from keras.utils.io_utils import HDF5Matrix
+    import numpy                as     np
-    import matplotlib.pyplot as plt
+    from   keras.models         import load_model
-    from scipy.misc import imresize
+    from   keras.utils.io_utils import HDF5Matrix
    import matplotlib.pyplot    as     plt
    from   scipy.misc           import imresize
-    n_videos = {'avenue': 21, 'enter': 6, 'exit': 4, 'ped1': 36, 'ped2': 12}
+    n_videos       = {'avenue': 1}
-    test_dir = os.path.join(video_root_path, '{0}/testing_h5_t{1}'.format(dataset, t))
+    test_dir       = os.path.join(video_root_path, '{0}/testing_h5_t{1}'.format(dataset, t))
-    job_folder = os.path.join('/share/clean/{}/jobs'.format(dataset), job_uuid)
+    job_folder     = os.path.join('./data/clean/{}/jobs'.format(dataset), job_uuid)
    model_filename = 'model_snapshot_e{:03d}_{:.6f}.h5'.format(epoch, val_loss)
    temporal_model = load_model(os.path.join(job_folder, model_filename))
-    save_path = os.path.join(job_folder, 'result')
+    save_path      = os.path.join(job_folder, 'result')
    os.makedirs(save_path, exist_ok=True)
    for videoid in range(n_videos[dataset]):
        videoname = '{0}_{1:02d}.h5'.format(dataset, videoid+1)
-        filepath = os.path.join(test_dir, videoname)
+        filepath  = os.path.join(test_dir, videoname)
        logger.info("==> {}".format(filepath))
-        f = h5py.File(filepath, 'r')
+        f         = h5py.File(filepath, 'r')
-        filesize = f['data'].shape[0]
+        filesize  = f['data'].shape[0]
        f.close()
-        gt_vid_raw = np.loadtxt('/share/data/groundtruths/gt_{0}_vid{1:02d}.txt'.format(dataset, videoid+1))
+        # gt_vid_raw = np.loadtxt('./data/groundtruths/gt_{0}_vid{1:02d}.txt'.format(dataset, videoid+1))
        logger.debug("Predicting using {}".format(os.path.join(job_folder, model_filename)))
        X_test = HDF5Matrix(filepath, 'data')
@ -269,17 +291,17 @@ def test(logger, dataset, t, job_uuid, epoch, val_loss, visualize_score=True, vi
            plt.ylim(0, 1)
            plt.xlim(1, score_vid.shape[0]+1)
-            try:
+            # try:
-                for event in range(gt_vid_raw.shape[1]):
+            #     for event in range(gt_vid_raw.shape[1]):
-                    start = int(gt_vid_raw[0, event])
+            #         start = int(gt_vid_raw[0, event])
-                    end = int(gt_vid_raw[1, event]) + 1
+            #         end = int(gt_vid_raw[1, event]) + 1
-                    gt_vid[start:end] = 1
+            #         gt_vid[start:end] = 1
-                    plt.fill_between(np.arange(start, end), 0, 1, facecolor='red', alpha=0.4)
+            #         plt.fill_between(np.arange(start, end), 0, 1, facecolor='red', alpha=0.4)
-            except IndexError:
+            # except IndexError:
-                start = int(gt_vid_raw[0])
+            #     start = int(gt_vid_raw[0])
-                end = int(gt_vid_raw[1])
+            #     end = int(gt_vid_raw[1])
-                gt_vid[start:end] = 1
+            #     gt_vid[start:end] = 1
-                plt.fill_between(np.arange(start, end), 0, 1, facecolor='red', alpha=0.4)
+            #     plt.fill_between(np.arange(start, end), 0, 1, facecolor='red', alpha=0.4)
            plt.savefig(os.path.join(save_path, 'scores_{0}_video_{1:02d}.png'.format(dataset, videoid+1)), dpi=300)
            plt.close()
@ -305,5 +327,5 @@ def test(logger, dataset, t, job_uuid, epoch, val_loss, visualize_score=True, vi
                plt.clf()
    logger.info("{}: Calculating overall metrics".format(dataset))
-    auc_overall, eer_overall = calc_auc_overall(logger, dataset, n_videos[dataset], save_path)
+    # auc_overall, eer_overall = calc_auc_overall(logger, dataset, n_videos[dataset], save_path)
--- a/config.yml
+++ b/config.yml
@ -1,5 +1,5 @@
-time_length: 8
+time_length: 4
 cost: mse
-epochs: 2000
+epochs: 2
 optimizer: sgd
 batch_size: 16
--- a/convert_video_to_frame.py
+++ b/convert_video_to_frame.py
@ -3,12 +3,14 @@ import skvideo.io
 from skimage.transform import resize
 from skimage.io import imsave
-video_root_path = '/share/data/videos'
+video_root_path = './data/videos'
 size = (224, 224)
 def video_to_frame(dataset, train_or_test):
    video_path = os.path.join(video_root_path, dataset, '{}_videos'.format(train_or_test))
    frame_path = os.path.join(video_root_path, dataset, '{}_frames'.format(train_or_test))
    print('video_path = ', video_path)
    print('frame_path = ', frame_path)
    os.makedirs(frame_path, exist_ok=True)
    for video_file in os.listdir(video_path):
@ -23,22 +25,6 @@ def video_to_frame(dataset, train_or_test):
              imsave(os.path.join(vid_frame_path, '{:05d}.jpg'.format(count)), image)     # save frame as JPEG file
              count += 1
-# avenue
+# convert
 video_to_frame('avenue', 'training')
 video_to_frame('avenue', 'testing')
 # ped1
 video_to_frame('ped1', 'training')
 video_to_frame('ped1', 'testing')
 # ped2
 video_to_frame('ped2', 'training')
 video_to_frame('ped2', 'testing')
 # enter
 video_to_frame('enter', 'training')
 video_to_frame('enter', 'testing')
 # exit
 video_to_frame('exit', 'training')
 video_to_frame('exit', 'testing')
--- a/dataset.py
+++ b/dataset.py
@ -1,5 +1,5 @@
-def calc_mean(dataset, video_root_path='/share/data/videos'):
+def calc_mean(dataset, video_root_path='./data/videos'):
    import os
    from skimage.io import imread
    import numpy as np
@ -22,7 +22,7 @@ def calc_mean(dataset, video_root_path='/share/data/videos'):
    np.save(os.path.join(video_root_path, dataset, 'mean_frame_224.npy'), frame_mean)
-def subtract_mean(dataset, video_root_path='/share/data/videos'):
+def subtract_mean(dataset, video_root_path='./data/videos'):
    import os
    from skimage.io import imread
    import numpy as np
@ -56,7 +56,7 @@ def subtract_mean(dataset, video_root_path='/share/data/videos'):
        np.save(os.path.join(video_root_path, dataset, 'testing_frames_{}.npy'.format(frame_folder)), testing_frames_vid)
-def build_h5(dataset, train_or_test, t, video_root_path='/share/data/videos'):
+def build_h5(dataset, train_or_test, t, video_root_path='./data/videos'):
    import h5py
    from tqdm import tqdm
    import os
@ -87,7 +87,7 @@ def build_h5(dataset, train_or_test, t, video_root_path='/share/data/videos'):
    build_volume(train_or_test, num_videos, time_length=t)
-def combine_dataset(dataset, t, video_root_path='/share/data/videos'):
+def combine_dataset(dataset, t, video_root_path='./data/videos'):
    import h5py
    import os
    from tqdm import tqdm
@ -121,7 +121,7 @@ def combine_dataset(dataset, t, video_root_path='/share/data/videos'):
    output_file.close()
-def preprocess_data(logger, dataset, t, video_root_path='/share/data/videos'):
+def preprocess_data(logger, dataset, t, video_root_path='./data/videos'):
    import os
    # Step 1: Calculate the mean frame of all training frames
@ -167,6 +167,8 @@ def preprocess_data(logger, dataset, t, video_root_path='/share/data/videos'):
            num_videos = len(os.listdir(os.path.join(video_root_path, '{}/{}_frames'.format(dataset, train_or_test))))
            for i in range(num_videos):
                h5_file = os.path.join(video_root_path, '{0}/{1}_h5_t{2}/{0}_{3:02d}.h5'.format(dataset, train_or_test, t, i+1))
                print('-------------------')
                print(h5_file)
                assert(os.path.isfile(h5_file))
        except AssertionError:
            logger.info("Step 3/4: Generating volumes for {} {} set".format(dataset, train_or_test))
--- a/start_test.py
+++ b/start_test.py
@ -6,19 +6,21 @@ import coloredlogs
 from classifier import test
-device = 'cpu'
+device      = 'cpu'
-dataset = 'ped1'
+dataset     = 'avenue'
-job_uuid = '86f47b9c-d0ca-49a8-beb6-84373ea9e880'
+job_uuid    = '603213fe-3308-41d7-8ce2-d734ea4a547b'
-epoch = 586
+epoch       = 2
-val_loss = 0.001069
+val_loss    = 0.001326
-time_length = 8
+time_length = 4
 job_folder  = os.path.join('./data/clean/{}/jobs'.format(dataset), job_uuid)
 log_path    = os.path.join(job_folder, 'logs')
 job_folder = os.path.join('/share/clean/{}/jobs'.format(dataset), job_uuid)
 log_path = os.path.join(job_folder, 'logs')
 os.makedirs(log_path, exist_ok=True)
-logging.basicConfig(filename=os.path.join(log_path, "test-{}.log".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))),
+
-                    level=logging.DEBUG,
+logging.basicConfig(filename = os.path.join(log_path, "test-{}.log".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))),
-                    format="%(asctime)s [%(levelname)s] %(message)s")
+                    level    = logging.DEBUG,
                    format   = "%(asctime)s [%(levelname)s] %(message)s")
 coloredlogs.install()
 logger = logging.getLogger()
--- a/start_train.py
+++ b/start_train.py
@ -3,24 +3,25 @@ import datetime
 import os
 import sys
 import coloredlogs
 from classifier import train
 import uuid
-from shutil import copyfile
+from   classifier import train
 from   shutil     import copyfile
-dataset = 'ped1'
+dataset = 'avenue'
-device = 'gpu1'
+device  = 'cpu'
 job_uuid = str(uuid.uuid4())
-job_folder = os.path.join('/share/clean/{}/jobs'.format(dataset), job_uuid)
+job_folder = os.path.join('./data/clean/{}/jobs'.format(dataset), job_uuid)
 os.makedirs(job_folder)
 copyfile('config.yml', os.path.join(job_folder, 'config.yml'))
 log_path = os.path.join(job_folder, 'logs')
 os.makedirs(log_path, exist_ok=True)
-logging.basicConfig(filename=os.path.join(log_path,
+logging.basicConfig(filename = os.path.join(log_path,
-                                          "train-{}.log".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))),
+                                "train-{}.log".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))),
-                    level=logging.DEBUG,
+                    level    = logging.DEBUG,
-                    format="%(asctime)s [%(levelname)s] %(message)s")
+                    format   = "%(asctime)s [%(levelname)s] %(message)s")
 coloredlogs.install(level=logging.INFO)
 logger = logging.getLogger()