Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: CI for Windows Project

on:
push:
branches:
- main

jobs:
build-and-test:
runs-on: windows-latest # Use Windows runner

steps:
- name: Checkout Repository
uses: actions/checkout@v3

- name: Set up Python 3.12.4
uses: actions/setup-python@v4
with:
python-version: '3.12.4'

- name: Install SQLite CLI
run: |
echo "Installing SQLite..."
choco install sqlite --no-progress
echo "SQLite version:"
sqlite3 --version

- name: Check Python and SQLite Versions
run: |
python --version
python -c "import sqlite3; print(f'SQLite version: {sqlite3.sqlite_version}')"

- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install pandas requests
if (Test-Path -Path requirements.txt) { pip install -r requirements.txt }

- name: List Project Directory
run: |
echo "Project directory structure:"
dir project/

- name: Run Tests
run: |
echo "Setting tests.sh as executable..."
bash -c "chmod +x project/tests.sh"
echo "Running tests..."
bash project/tests.sh
Binary file added exercises/country-stats.sqlite
Binary file not shown.
47 changes: 47 additions & 0 deletions exercises/exercise1.jv
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//Building an automated data pipeline

// To make the pipeline of Airports
pipeline AirportsPipeline {

AirportsExtractor -> AirportsTextFileInterpreter;

AirportsTextFileInterpreter
-> AirportsCSVInterpreter
-> AirportsTableInterpreter
-> AirportsLoader;

// To get the CSV file
block AirportsExtractor oftype HttpExtractor {
url: "https://opendata.rhein-kreis-neuss.de/api/explore/v2.1/catalog/datasets/rhein-kreis-neuss-flughafen-weltweit/exports/csv?lang=en&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B";
}

// To interpret the file as a text file
block AirportsTextFileInterpreter oftype TextFileInterpreter { }

// To interpret CSV input and to output it as a sheet
block AirportsCSVInterpreter oftype CSVInterpreter {
delimiter: ";";
}

// To interpret a sheet as a table by excluding the columns Zeitzone, DST, Zeitzonen-Datenbank and geo_punkt
block AirportsTableInterpreter oftype TableInterpreter {
header: true;
columns: [
"Lfd. Nummer" oftype integer,
"Name des Flughafens" oftype text,
"Ort" oftype text,
"Land" oftype text,
"IATA" oftype text,
"ICAO" oftype text,
"Latitude" oftype decimal,
"Longitude" oftype decimal,
"Altitude" oftype integer,
];
}

// To generate the SQLite database
block AirportsLoader oftype SQLiteLoader {
table: "airports";
file: "airports.sqlite";
}
}
82 changes: 82 additions & 0 deletions exercises/exercise2.jv
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@

// For creating automated data-pipeline for extracting, processing, and storing tree planting data for Herbst 2023

pipeline HerbstPflanzungDatenPipeline {
block HerbstPflanzungDatenExtractor oftype HttpExtractor {
url: "https://opendata.rhein-kreis-neuss.de/api/v2/catalog/datasets/stadt-neuss-herbstpflanzung-2023/exports/csv";
}

// For converting the extracted text formatted data into a better format

block HerbstPflanzungDatenTextInterpreter oftype TextFileInterpreter {}

// For interpreting the text data as a CSV file by using ';' as the delimiter

block HerbstPflanzungDatenCSVInterpreter oftype CSVInterpreter {
delimiter: ';';
}

// For deleting the colomn "baumart_deutsch" as it has no impact on the further processing

block HerbstPflanzungDatenColumnDeleter oftype ColumnDeleter {
delete: [column E];
}

// For filtering and validating the table data and specifying the expected column types

block HerbstPflanzungDatenValidFilter oftype TableInterpreter {
header: true;
columns: [
"lfd_nr" oftype integer,
"stadtteil" oftype StadtteilVogelsang,
"standort" oftype text,
"baumart_botanisch" oftype text,
"id" oftype GeoPattern,
"baumfamilie" oftype text
];
}

// For defining a value type for the geographic coordinates in the pattern

valuetype GeoPattern oftype text {
constraints: [
IDGeoPattern
];
}

// For ensuring that the geographic coordinates are formatted correctly

constraint IDGeoPattern oftype RegexConstraint {
regex: /^\d{1,3}\.\d+, \d{1,3}\.\d+$/;
}

// For defining a value type for the 'stadtteil' column specifically for the 'Vogelsang' entries

valuetype StadtteilVogelsang oftype text {
constraints: [
Vogelsang
];
}

// For ensuring that the 'stadtteil' starts with 'Vogelsang'

constraint Vogelsang oftype RegexConstraint {
regex: /^Vogelsang.*/;
}

// For loading the processed data into a SQLite database table called 'trees'

block HerbstPflanzungDatenSQLiteLoader oftype SQLiteLoader {
table: "trees";
file: "trees.sqlite";
}

// For defining the sequence of data processing from extraction to loading

HerbstPflanzungDatenExtractor -> HerbstPflanzungDatenTextInterpreter;
HerbstPflanzungDatenTextInterpreter
-> HerbstPflanzungDatenCSVInterpreter
-> HerbstPflanzungDatenColumnDeleter
-> HerbstPflanzungDatenValidFilter
-> HerbstPflanzungDatenSQLiteLoader;
}
93 changes: 93 additions & 0 deletions exercises/exercise3.jv
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
pipeline EconomicMetricsPipeline {

// To define the custom value types with constraints
valuetype decimalBetweenZeroAndOne oftype decimal {
constraints: [WithinZeroToOne];
}

valuetype nonNegativeDecimal oftype decimal {
constraints: [GreaterThanOrEqualToZero];
}

constraint WithinZeroToOne oftype RangeConstraint {
lowerBound: 0;
upperBound: 1;
}

constraint GreaterThanOrEqualToZero oftype RangeConstraint {
lowerBound: 0;
}

// To download the file from a specific URL
block DataDownloader oftype HttpExtractor {
url: 'https://thedocs.worldbank.org/en/doc/7d852628d96b9411d43e5d36d5dff941-0050062022/original/Graphs-Chapter-5-02082022.xlsx';
retries: 3;
}

block FileInterpreter oftype XLSXInterpreter { }

// To select the sheet that is to be processed
block TargetSheetSelector oftype SheetPicker {
sheetName: "Figure S5.1.2";
}

// To extract the specific cell range for performing data processing
block CellRangeExtractor oftype CellRangeSelector {
select: range P2:S45;
}

// To rename the columns to get clarity
block HeaderRenamer oftype CellWriter {
at: range A1:D1;
write: [
'Country Code',
'Economy',
'GDP per Capita',
'Bond Issuance Share'
];
}

// To parse GDP-related data with mentioned column types
block GDPDataParser oftype TableInterpreter {
header: true;
columns: [
'Country Code' oftype CountryCodeAlpha3,
'GDP per Capita' oftype nonNegativeDecimal,
];
}

// To parse Bond-related data along with the specified column types
block BondDataParser oftype TableInterpreter {
header: true;
columns: [
'Country Code' oftype CountryCodeAlpha3,
'Bond Issuance Share' oftype decimalBetweenZeroAndOne,
];
}

// To load GDP data into SQLite database
block GDPSaver oftype SQLiteLoader {
table: 'gdpPerCapita';
file: 'country-stats.sqlite';
}

// To load Bond data into SQLite database
block BondSaver oftype SQLiteLoader {
table: 'bondIssuance';
file: 'country-stats.sqlite';
}

DataDownloader
-> FileInterpreter
-> TargetSheetSelector
-> CellRangeExtractor
-> HeaderRenamer;

HeaderRenamer
-> GDPDataParser
-> GDPSaver;

HeaderRenamer
-> BondDataParser
-> BondSaver;
}
108 changes: 108 additions & 0 deletions exercises/exercise4.jv
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Defines the pipeline named "temperaturePipeline" to process the temperature data
pipeline temperaturePipeline {

tempExtractor
-> tempZipInterpreter
-> tempFilePicker
-> tempTextFileInterpreter
-> tempCSVInterpreter
-> headerRename
-> batteryHeaderRename
-> tempTableInterpretor
-> tempToFahrenheit
-> batteryTempToFahrenheit
-> tempLoader;

// Downloads dataset from the specified URL as a ZIP file
block tempExtractor oftype HttpExtractor {
url: "https://www.mowesta.com/data/measure/mowesta-dataset-20221107.zip";
}

// Interprets the downloaded ZIP archive and extracts its contents
block tempZipInterpreter oftype ArchiveInterpreter {
archiveType: "zip";
}

// Picks the specific file data.csv from the extracted archive
block tempFilePicker oftype FilePicker {
path: '/data.csv';
}

// Reads the content of the selected file as text
block tempTextFileInterpreter oftype TextFileInterpreter {}

// Parses the text file as a CSV, using ';' as the delimiter
block tempCSVInterpreter oftype CSVInterpreter {
delimiter: ";";
}

// Renames the headers of the table from German to English
block headerRename oftype CellWriter {
at: range A1:E1;
write: ["id", "producer", "model", "month", "temperature"];
}

// Adds a header for the "battery_temperature" column
block batteryHeaderRename oftype CellWriter {
at: cell J1;
write: ["battery_temperature"];
}

// Interprets the data as a table with specific columns and their respective types
block tempTableInterpretor oftype TableInterpreter {
header: true;
columns: [
"id" oftype PositiveInteger,
"producer" oftype text,
"model" oftype text,
"month" oftype MonthType,
"temperature" oftype decimal,
"battery_temperature" oftype decimal
];
}

// Converts Celsius to Fahrenheit using the specified formula
transform CtoF {
from Celsius oftype decimal;
to Fahrenheit oftype decimal;
Fahrenheit: (Celsius * 9/5) + 32;
}

// Transforms the "temperature" column values from Celsius to Fahrenheit
block tempToFahrenheit oftype TableTransformer {
inputColumns: ['temperature'];
outputColumn: 'temperature';
uses: CtoF;
}

// Transforms the "battery_temperature" column values from Celsius to Fahrenheit
block batteryTempToFahrenheit oftype TableTransformer {
inputColumns: ['battery_temperature'];
outputColumn: 'battery_temperature';
uses: CtoF;
}

// Loads the processed data into an SQLite database
block tempLoader oftype SQLiteLoader {
table: 'temperatures';
file: 'temperatures.sqlite';
}

// Constraint ensuring integer values are non-negative
constraint PositiveConstraint on integer:
value >= 0;

// Defines a custom positive integer type
valuetype PositiveInteger oftype integer {
constraints: [PositiveConstraint];
}

// Constraint ensuring month values are within the valid range (1-12)
constraint MonthConstraint on integer:
value >= 1 and value <= 12;

// Defines a custom type for valid month values
valuetype MonthType oftype integer {
constraints: [MonthConstraint];
}
}
Loading