Skip to content

Neural Networks

franklinic edited this page Jan 19, 2026 · 1 revision

Neural Networks

AiDotNet provides 100+ neural network architectures for deep learning tasks. This guide covers the available architectures and how to use them.

Overview

Neural networks in AiDotNet are built using a fluent API that allows you to stack layers and configure training parameters easily.

Basic Neural Network

using AiDotNet;
using AiDotNet.NeuralNetworks;

var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
    .WithTrainingData(trainImages, trainLabels)
    .WithNeuralNetwork(nn => nn
        .AddDenseLayer(128, Activation.ReLU)
        .AddDenseLayer(64, Activation.ReLU)
        .AddOutputLayer(10, Activation.Softmax))
    .WithOptimizer(OptimizerType.Adam)
    .WithLossFunction(LossType.CrossEntropy)
    .WithEpochs(10)
    .BuildAsync();

Layer Types

Dense (Fully Connected) Layers

nn.AddDenseLayer(units: 256, activation: Activation.ReLU)
nn.AddDenseLayer(units: 128, activation: Activation.LeakyReLU, alpha: 0.01f)

Convolutional Layers

// 2D Convolution (for images)
nn.AddConv2DLayer(filters: 32, kernelSize: 3, activation: Activation.ReLU)
nn.AddConv2DLayer(filters: 64, kernelSize: (3, 3), strides: (1, 1), padding: Padding.Same)

// 1D Convolution (for sequences)
nn.AddConv1DLayer(filters: 64, kernelSize: 3)

// 3D Convolution (for video/volumetric data)
nn.AddConv3DLayer(filters: 32, kernelSize: 3)

Pooling Layers

nn.AddMaxPooling2D(poolSize: 2)
nn.AddAveragePooling2D(poolSize: 2)
nn.AddGlobalAveragePooling2D()
nn.AddGlobalMaxPooling2D()

Recurrent Layers

// LSTM
nn.AddLSTMLayer(units: 128, returnSequences: true)
nn.AddLSTMLayer(units: 64, returnSequences: false)

// GRU
nn.AddGRULayer(units: 128)

// Bidirectional
nn.AddBidirectional(new LSTMLayer(units: 64))

Normalization Layers

nn.AddBatchNormalization()
nn.AddLayerNormalization()
nn.AddGroupNormalization(groups: 8)
nn.AddInstanceNormalization()

Regularization Layers

nn.AddDropout(rate: 0.2)
nn.AddSpatialDropout2D(rate: 0.2)
nn.AddGaussianNoise(stddev: 0.1)
nn.AddActivityRegularization(l1: 0.01, l2: 0.01)

Attention Layers

nn.AddMultiHeadAttention(numHeads: 8, keyDim: 64)
nn.AddSelfAttention(numHeads: 8)
nn.AddCrossAttention(numHeads: 8)

Utility Layers

nn.AddFlattenLayer()
nn.AddReshapeLayer(targetShape: new[] { 28, 28, 1 })
nn.AddConcatenate()
nn.AddAdd()  // Residual connection

Pre-built Architectures

ResNet

var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
    .WithTrainingData(images, labels)
    .WithNeuralNetwork(nn => nn
        .UseResNet(variant: ResNetVariant.ResNet50, numClasses: 1000, pretrained: true))
    .BuildAsync();

Available variants: ResNet18, ResNet34, ResNet50, ResNet101, ResNet152

VGG

nn.UseVGG(variant: VGGVariant.VGG16, numClasses: 1000, pretrained: true)

Available variants: VGG11, VGG13, VGG16, VGG19

EfficientNet

nn.UseEfficientNet(variant: EfficientNetVariant.B0, numClasses: 1000)

Available variants: B0 through B7, V2_S, V2_M, V2_L

MobileNet

nn.UseMobileNet(variant: MobileNetVariant.V3Small, numClasses: 1000)

Vision Transformer (ViT)

nn.UseVisionTransformer(
    imageSize: 224,
    patchSize: 16,
    numClasses: 1000,
    embeddingDim: 768,
    depth: 12,
    numHeads: 12)

Transformer (for NLP)

nn.UseTransformer(
    vocabSize: 30000,
    embeddingDim: 512,
    numHeads: 8,
    numLayers: 6,
    feedForwardDim: 2048)

Building Custom Architectures

CNN for Image Classification

var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
    .WithTrainingData(images, labels)
    .WithNeuralNetwork(nn => nn
        // Input: 224x224x3
        .AddConv2DLayer(32, 3, Activation.ReLU, padding: Padding.Same)
        .AddBatchNormalization()
        .AddMaxPooling2D(2)

        .AddConv2DLayer(64, 3, Activation.ReLU, padding: Padding.Same)
        .AddBatchNormalization()
        .AddMaxPooling2D(2)

        .AddConv2DLayer(128, 3, Activation.ReLU, padding: Padding.Same)
        .AddBatchNormalization()
        .AddMaxPooling2D(2)

        .AddGlobalAveragePooling2D()
        .AddDropout(0.5)
        .AddOutputLayer(numClasses, Activation.Softmax))
    .WithOptimizer(OptimizerType.AdamW, learningRate: 1e-4)
    .WithLossFunction(LossType.CrossEntropy)
    .WithEpochs(50)
    .ConfigureGpu(gpu => gpu.Enabled = true)
    .BuildAsync();

LSTM for Sequence Classification

var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
    .WithTrainingData(sequences, labels)
    .WithNeuralNetwork(nn => nn
        .AddEmbeddingLayer(vocabSize: 10000, embeddingDim: 128)
        .AddLSTMLayer(128, returnSequences: true)
        .AddDropout(0.3)
        .AddLSTMLayer(64, returnSequences: false)
        .AddDropout(0.3)
        .AddDenseLayer(32, Activation.ReLU)
        .AddOutputLayer(numClasses, Activation.Softmax))
    .BuildAsync();

Transformer for Text Classification

var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
    .WithTrainingData(tokenizedTexts, labels)
    .WithNeuralNetwork(nn => nn
        .AddEmbeddingLayer(vocabSize: 30000, embeddingDim: 256)
        .AddPositionalEncoding(maxLength: 512)
        .AddTransformerEncoderBlock(numHeads: 8, feedForwardDim: 512)
        .AddTransformerEncoderBlock(numHeads: 8, feedForwardDim: 512)
        .AddGlobalAveragePooling1D()
        .AddDenseLayer(64, Activation.ReLU)
        .AddDropout(0.1)
        .AddOutputLayer(numClasses, Activation.Softmax))
    .BuildAsync();

Activation Functions

Activation Usage Description
ReLU Hidden layers max(0, x)
LeakyReLU Hidden layers max(alpha*x, x)
ELU Hidden layers Exponential linear unit
SELU Self-normalizing networks Scaled ELU
Swish Modern architectures x * sigmoid(x)
GELU Transformers Gaussian error linear unit
Softmax Multi-class output Probability distribution
Sigmoid Binary output 0-1 range
Tanh RNN hidden states -1 to 1 range

Optimizers

// Adam (default, good for most cases)
.WithOptimizer(OptimizerType.Adam, learningRate: 0.001)

// AdamW (Adam with weight decay, good for transformers)
.WithOptimizer(OptimizerType.AdamW, learningRate: 1e-4, weightDecay: 0.01)

// SGD with momentum
.WithOptimizer(OptimizerType.SGD, learningRate: 0.01, momentum: 0.9)

// RMSprop
.WithOptimizer(OptimizerType.RMSprop, learningRate: 0.001)

// Lion (efficient optimizer)
.WithOptimizer(OptimizerType.Lion, learningRate: 1e-4)

See Optimizers for the full list of 42+ optimizers.

Loss Functions

// Classification
.WithLossFunction(LossType.CrossEntropy)
.WithLossFunction(LossType.BinaryCrossEntropy)
.WithLossFunction(LossType.FocalLoss, gamma: 2.0)

// Regression
.WithLossFunction(LossType.MSE)
.WithLossFunction(LossType.MAE)
.WithLossFunction(LossType.Huber, delta: 1.0)

// Specialized
.WithLossFunction(LossType.DiceLoss)  // Segmentation
.WithLossFunction(LossType.TripletLoss)  // Embeddings

See Loss Functions for the full list of 37+ loss functions.

Training Configuration

var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
    .WithTrainingData(features, labels)
    .WithNeuralNetwork(nn => /* ... */)

    // Basic training
    .WithEpochs(100)
    .WithBatchSize(32)

    // Learning rate scheduling
    .WithLearningRateScheduler(LRScheduler.CosineAnnealing,
        initialLr: 1e-3, minLr: 1e-6)

    // Early stopping
    .WithEarlyStopping(patience: 10, metric: Metric.ValidationLoss)

    // Checkpointing
    .WithCheckpointing(saveEvery: 5, path: "./checkpoints")

    // Mixed precision (faster on modern GPUs)
    .WithMixedPrecision(enabled: true)

    // Gradient clipping
    .WithGradientClipping(maxNorm: 1.0)

    .BuildAsync();

Transfer Learning

// Load pretrained model and fine-tune
var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
    .WithTrainingData(myImages, myLabels)
    .WithNeuralNetwork(nn => nn
        .UseResNet(ResNetVariant.ResNet50, pretrained: true)
        .FreezeLayersUpTo("layer4")  // Freeze early layers
        .ReplaceHead(new Sequential(
            new DenseLayer(256, Activation.ReLU),
            new Dropout(0.5),
            new DenseLayer(myNumClasses, Activation.Softmax))))
    .WithOptimizer(OptimizerType.Adam, learningRate: 1e-4)
    .BuildAsync();

Model Summary

// Print model architecture
result.Model.Summary();

// Output:
// Layer (type)                 Output Shape              Param #
// ================================================================
// conv2d (Conv2D)              (None, 224, 224, 32)      896
// batch_normalization          (None, 224, 224, 32)      128
// max_pooling2d                (None, 112, 112, 32)      0
// ...
// ================================================================
// Total params: 25,557,032
// Trainable params: 25,503,912
// Non-trainable params: 53,120

Saving and Loading Models

// Save model
await result.Model.SaveAsync("./models/my_model.aidotnet");

// Load model
var loadedModel = await NeuralNetworkModel<float>.LoadAsync("./models/my_model.aidotnet");

// Save just weights
await result.Model.SaveWeightsAsync("./models/weights.bin");

// Load weights into existing architecture
model.LoadWeights("./models/weights.bin");

Next Steps

Clone this wiki locally