gaurav-gireesh commented on a change in pull request #13241: [MXNET-1210 ][WIP] 
Gluon Audio
URL: https://github.com/apache/incubator-mxnet/pull/13241#discussion_r234060945
 
 

 ##########
 File path: example/gluon/urban_sounds/urban_sounds.py
 ##########
 @@ -0,0 +1,181 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+    Urban Sounds Dataset:
+
+    To be able to run this example:
+
+    1. Download the dataset(train.zip, test.zip) required for this example 
from the location:
+    **https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU**
+    2. Extract both the zip archives into the **current directory** -
+       after unzipping you would get 2 new folders namely,\
+       **Train** and **Test** and two csv files - **train_csv.csv**, 
**test_csv.csv**
+    3. Apache MXNet is installed on the machine. For instructions, go to the 
link:
+    **https://mxnet.incubator.apache.org/install/ **
+    4. Librosa is installed. To install, follow the instructions here:
+     **https://librosa.github.io/librosa/install.html**
+
+"""
+import os
+import time
+import warnings
+import mxnet as mx
+from mxnet import gluon, nd, autograd
+from mxnet.gluon.contrib.data.audio.datasets import AudioFolderDataset
+from mxnet.gluon.contrib.data.audio.transforms import Loader, MFCC
+try:
+    import argparse
+except ImportError as er:
+    warnings.warn("Argument parsing module could not be imported and hence \
+    no arguments passed to the script can actually be parsed.")
+
+
+# Defining a neural network with number of labels
+def get_net(num_labels=10):
+    net = gluon.nn.Sequential()
+    with net.name_scope():
+        net.add(gluon.nn.Dense(256, activation="relu")) # 1st layer (256 nodes)
+        net.add(gluon.nn.Dense(256, activation="relu")) # 2nd hidden layer
+    net.add(gluon.nn.Dense(num_labels))
+    net.collect_params().initialize(mx.init.Normal(1.))
+    return net
+
+
+# Defining a function to evaluate accuracy
+def evaluate_accuracy(data_iterator, net):
+    acc = mx.metric.Accuracy()
+    for _, (data, label) in enumerate(data_iterator):
+        output = net(data)
+        predictions = nd.argmax(output, axis=1)
+        predictions = predictions.reshape((-1, 1))
+        acc.update(preds=predictions, labels=label)
+    return acc.get()[1]
+
+
+def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
+    """
+        The function responsible for running the training the model.
+    """
+    if not train_dir or not os.path.exists(train_dir) or not train_csv:
+        warnings.warn("No train directory could be found ")
+        return
+    # Make a dataset from the local folder containing Audio data
+    print("\nMaking an Audio Dataset...\n")
+    tick = time.time()
+    aud_dataset = AudioFolderDataset('./Train', has_csv=True, 
train_csv='./train.csv', file_format='.wav', skip_rows=1)
+    tock = time.time()
+
+    print("Loading the dataset took ", (tock-tick), " seconds.")
+    print("\n=======================================\n")
+
+    print("The synset for the dataset are: ", aud_dataset.synsets)
+    print("Seeing one item from the dataset here...\n")
+    print(aud_dataset.__getitem__(0))
+    print("Number of output classes = ", len(aud_dataset.synsets))
+    # Get the model to train
+    net = get_net(len(aud_dataset.synsets))
+    print("\nNeural Network = \n")
+    print(net)
+    print("\nModel - Neural Network Generated!\n")
+    print("=======================================\n")
+
+    #Define the loss - Softmax CE Loss
+    softmax_loss = gluon.loss.SoftmaxCELoss(from_logits=False, 
sparse_label=True)
+    print("Loss function initialized!\n")
+    print("=======================================\n")
+
+    #Define the trainer with the optimizer
+    trainer = gluon.Trainer(net.collect_params(), 'adadelta')
+    print("Optimizer - Trainer function initialized!\n")
+    print("=======================================\n")
+
+
+    print("Loading the dataset to the Gluon's OOTB Dataloader...")
+
+    #Getting the data loader out of the AudioDataset and passing the transform
+    aud_transform = gluon.data.vision.transforms.Compose([Loader(), MFCC()])
+    tick = time.time()
+
+    audio_train_loader = 
gluon.data.DataLoader(aud_dataset.transform_first(aud_transform), 
batch_size=32, shuffle=True)
+    tock = time.time()
+    print("Time taken to load data and apply transform here is ", (tock-tick), 
" seconds.")
+    print("=======================================\n")
+
+
+    print("Starting the training....\n")
+    # Training loop
+    tick = time.time()
+    batch_size = batch_size
+    num_examples = len(aud_dataset)
+
+    for e in range(epochs):
+        cumulative_loss = 0
+        for _, (data, label) in enumerate(audio_train_loader):
+            with autograd.record():
+                output = net(data)
+                loss = softmax_loss(output, label)
+            loss.backward()
+            trainer.step(batch_size)
+            cumulative_loss += mx.nd.sum(loss).asscalar()
+
+        if e%5 == 0:
+            train_accuracy = evaluate_accuracy(audio_train_loader, net)
+            print("Epoch %s. Loss: %s Train accuracy : %s " % (e, 
cumulative_loss/num_examples, train_accuracy))
+            print("\n------------------------------\n")
+
+    train_accuracy = evaluate_accuracy(audio_train_loader, net)
+    tock = time.time()
+    print("\nFinal training accuracy: ", train_accuracy)
+
+    print("Training the sound classification for ", epochs, " epochs, MLP 
model took ", (tock-tick), " seconds")
+    print("====================== END ======================\n")
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description="Urban Sounds clsssification 
example - MXNet")
+    parser.add_argument('--train', '-t', help="Enter the folder path that 
contains your audio files", type=str)
+    parser.add_argument('--csv', '-c', help="Enter the filename of the csv 
that contains filename\
+    to label mapping", type=str)
+    parser.add_argument('--epochs', '-e', help="Enter the number of epochs \
+    you would want to run the training for.", type=int)
+    parser.add_argument('--batch_size', '-b', help="Enter the batch_size of 
data", type=int)
+    args = parser.parse_args()
+
+    if args:
+        if args.train:
+            train_dir = args.train
+        else:
+            train_dir = './Train'
+
+        if args.csv:
+            train_csv = args.csv
+        else:
+            train_csv = './train.csv'
+
+        if args.epochs:
+            epochs = args.epochs
+        else:
+            epochs = 35
 
 Review comment:
   Yes. Thanks.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to