-
-
Notifications
You must be signed in to change notification settings - Fork 8
Neural Networks
franklinic edited this page Jan 19, 2026
·
1 revision
AiDotNet provides 100+ neural network architectures for deep learning tasks. This guide covers the available architectures and how to use them.
Neural networks in AiDotNet are built using a fluent API that allows you to stack layers and configure training parameters easily.
using AiDotNet;
using AiDotNet.NeuralNetworks;
var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
.WithTrainingData(trainImages, trainLabels)
.WithNeuralNetwork(nn => nn
.AddDenseLayer(128, Activation.ReLU)
.AddDenseLayer(64, Activation.ReLU)
.AddOutputLayer(10, Activation.Softmax))
.WithOptimizer(OptimizerType.Adam)
.WithLossFunction(LossType.CrossEntropy)
.WithEpochs(10)
.BuildAsync();nn.AddDenseLayer(units: 256, activation: Activation.ReLU)
nn.AddDenseLayer(units: 128, activation: Activation.LeakyReLU, alpha: 0.01f)// 2D Convolution (for images)
nn.AddConv2DLayer(filters: 32, kernelSize: 3, activation: Activation.ReLU)
nn.AddConv2DLayer(filters: 64, kernelSize: (3, 3), strides: (1, 1), padding: Padding.Same)
// 1D Convolution (for sequences)
nn.AddConv1DLayer(filters: 64, kernelSize: 3)
// 3D Convolution (for video/volumetric data)
nn.AddConv3DLayer(filters: 32, kernelSize: 3)nn.AddMaxPooling2D(poolSize: 2)
nn.AddAveragePooling2D(poolSize: 2)
nn.AddGlobalAveragePooling2D()
nn.AddGlobalMaxPooling2D()// LSTM
nn.AddLSTMLayer(units: 128, returnSequences: true)
nn.AddLSTMLayer(units: 64, returnSequences: false)
// GRU
nn.AddGRULayer(units: 128)
// Bidirectional
nn.AddBidirectional(new LSTMLayer(units: 64))nn.AddBatchNormalization()
nn.AddLayerNormalization()
nn.AddGroupNormalization(groups: 8)
nn.AddInstanceNormalization()nn.AddDropout(rate: 0.2)
nn.AddSpatialDropout2D(rate: 0.2)
nn.AddGaussianNoise(stddev: 0.1)
nn.AddActivityRegularization(l1: 0.01, l2: 0.01)nn.AddMultiHeadAttention(numHeads: 8, keyDim: 64)
nn.AddSelfAttention(numHeads: 8)
nn.AddCrossAttention(numHeads: 8)nn.AddFlattenLayer()
nn.AddReshapeLayer(targetShape: new[] { 28, 28, 1 })
nn.AddConcatenate()
nn.AddAdd() // Residual connectionvar result = await new PredictionModelBuilder<float, Tensor<float>, int>()
.WithTrainingData(images, labels)
.WithNeuralNetwork(nn => nn
.UseResNet(variant: ResNetVariant.ResNet50, numClasses: 1000, pretrained: true))
.BuildAsync();Available variants: ResNet18, ResNet34, ResNet50, ResNet101, ResNet152
nn.UseVGG(variant: VGGVariant.VGG16, numClasses: 1000, pretrained: true)Available variants: VGG11, VGG13, VGG16, VGG19
nn.UseEfficientNet(variant: EfficientNetVariant.B0, numClasses: 1000)Available variants: B0 through B7, V2_S, V2_M, V2_L
nn.UseMobileNet(variant: MobileNetVariant.V3Small, numClasses: 1000)nn.UseVisionTransformer(
imageSize: 224,
patchSize: 16,
numClasses: 1000,
embeddingDim: 768,
depth: 12,
numHeads: 12)nn.UseTransformer(
vocabSize: 30000,
embeddingDim: 512,
numHeads: 8,
numLayers: 6,
feedForwardDim: 2048)var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
.WithTrainingData(images, labels)
.WithNeuralNetwork(nn => nn
// Input: 224x224x3
.AddConv2DLayer(32, 3, Activation.ReLU, padding: Padding.Same)
.AddBatchNormalization()
.AddMaxPooling2D(2)
.AddConv2DLayer(64, 3, Activation.ReLU, padding: Padding.Same)
.AddBatchNormalization()
.AddMaxPooling2D(2)
.AddConv2DLayer(128, 3, Activation.ReLU, padding: Padding.Same)
.AddBatchNormalization()
.AddMaxPooling2D(2)
.AddGlobalAveragePooling2D()
.AddDropout(0.5)
.AddOutputLayer(numClasses, Activation.Softmax))
.WithOptimizer(OptimizerType.AdamW, learningRate: 1e-4)
.WithLossFunction(LossType.CrossEntropy)
.WithEpochs(50)
.ConfigureGpu(gpu => gpu.Enabled = true)
.BuildAsync();var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
.WithTrainingData(sequences, labels)
.WithNeuralNetwork(nn => nn
.AddEmbeddingLayer(vocabSize: 10000, embeddingDim: 128)
.AddLSTMLayer(128, returnSequences: true)
.AddDropout(0.3)
.AddLSTMLayer(64, returnSequences: false)
.AddDropout(0.3)
.AddDenseLayer(32, Activation.ReLU)
.AddOutputLayer(numClasses, Activation.Softmax))
.BuildAsync();var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
.WithTrainingData(tokenizedTexts, labels)
.WithNeuralNetwork(nn => nn
.AddEmbeddingLayer(vocabSize: 30000, embeddingDim: 256)
.AddPositionalEncoding(maxLength: 512)
.AddTransformerEncoderBlock(numHeads: 8, feedForwardDim: 512)
.AddTransformerEncoderBlock(numHeads: 8, feedForwardDim: 512)
.AddGlobalAveragePooling1D()
.AddDenseLayer(64, Activation.ReLU)
.AddDropout(0.1)
.AddOutputLayer(numClasses, Activation.Softmax))
.BuildAsync();| Activation | Usage | Description |
|---|---|---|
ReLU |
Hidden layers | max(0, x) |
LeakyReLU |
Hidden layers | max(alpha*x, x) |
ELU |
Hidden layers | Exponential linear unit |
SELU |
Self-normalizing networks | Scaled ELU |
Swish |
Modern architectures | x * sigmoid(x) |
GELU |
Transformers | Gaussian error linear unit |
Softmax |
Multi-class output | Probability distribution |
Sigmoid |
Binary output | 0-1 range |
Tanh |
RNN hidden states | -1 to 1 range |
// Adam (default, good for most cases)
.WithOptimizer(OptimizerType.Adam, learningRate: 0.001)
// AdamW (Adam with weight decay, good for transformers)
.WithOptimizer(OptimizerType.AdamW, learningRate: 1e-4, weightDecay: 0.01)
// SGD with momentum
.WithOptimizer(OptimizerType.SGD, learningRate: 0.01, momentum: 0.9)
// RMSprop
.WithOptimizer(OptimizerType.RMSprop, learningRate: 0.001)
// Lion (efficient optimizer)
.WithOptimizer(OptimizerType.Lion, learningRate: 1e-4)See Optimizers for the full list of 42+ optimizers.
// Classification
.WithLossFunction(LossType.CrossEntropy)
.WithLossFunction(LossType.BinaryCrossEntropy)
.WithLossFunction(LossType.FocalLoss, gamma: 2.0)
// Regression
.WithLossFunction(LossType.MSE)
.WithLossFunction(LossType.MAE)
.WithLossFunction(LossType.Huber, delta: 1.0)
// Specialized
.WithLossFunction(LossType.DiceLoss) // Segmentation
.WithLossFunction(LossType.TripletLoss) // EmbeddingsSee Loss Functions for the full list of 37+ loss functions.
var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
.WithTrainingData(features, labels)
.WithNeuralNetwork(nn => /* ... */)
// Basic training
.WithEpochs(100)
.WithBatchSize(32)
// Learning rate scheduling
.WithLearningRateScheduler(LRScheduler.CosineAnnealing,
initialLr: 1e-3, minLr: 1e-6)
// Early stopping
.WithEarlyStopping(patience: 10, metric: Metric.ValidationLoss)
// Checkpointing
.WithCheckpointing(saveEvery: 5, path: "./checkpoints")
// Mixed precision (faster on modern GPUs)
.WithMixedPrecision(enabled: true)
// Gradient clipping
.WithGradientClipping(maxNorm: 1.0)
.BuildAsync();// Load pretrained model and fine-tune
var result = await new PredictionModelBuilder<float, Tensor<float>, int>()
.WithTrainingData(myImages, myLabels)
.WithNeuralNetwork(nn => nn
.UseResNet(ResNetVariant.ResNet50, pretrained: true)
.FreezeLayersUpTo("layer4") // Freeze early layers
.ReplaceHead(new Sequential(
new DenseLayer(256, Activation.ReLU),
new Dropout(0.5),
new DenseLayer(myNumClasses, Activation.Softmax))))
.WithOptimizer(OptimizerType.Adam, learningRate: 1e-4)
.BuildAsync();// Print model architecture
result.Model.Summary();
// Output:
// Layer (type) Output Shape Param #
// ================================================================
// conv2d (Conv2D) (None, 224, 224, 32) 896
// batch_normalization (None, 224, 224, 32) 128
// max_pooling2d (None, 112, 112, 32) 0
// ...
// ================================================================
// Total params: 25,557,032
// Trainable params: 25,503,912
// Non-trainable params: 53,120// Save model
await result.Model.SaveAsync("./models/my_model.aidotnet");
// Load model
var loadedModel = await NeuralNetworkModel<float>.LoadAsync("./models/my_model.aidotnet");
// Save just weights
await result.Model.SaveWeightsAsync("./models/weights.bin");
// Load weights into existing architecture
model.LoadWeights("./models/weights.bin");- Computer Vision - Image-specific architectures
- NLP - Text processing networks
- Optimizers - Full optimizer reference
- Loss Functions - Full loss function reference
- Distributed Training - Multi-GPU training
Getting Started
Core Concepts
Reference
Community