diff --git a/pydata-eindhoven-2023/category.json b/pydata-eindhoven-2023/category.json new file mode 100644 index 000000000..0d521c633 --- /dev/null +++ b/pydata-eindhoven-2023/category.json @@ -0,0 +1,3 @@ +{ + "title": "PyData Eindhoven 2023" +} diff --git a/pydata-eindhoven-2023/videos/adrian-boguszewski-beyond-the-continuum-the-importance-of-quantization-in-deep-learning.json b/pydata-eindhoven-2023/videos/adrian-boguszewski-beyond-the-continuum-the-importance-of-quantization-in-deep-learning.json new file mode 100644 index 000000000..be38814c0 --- /dev/null +++ b/pydata-eindhoven-2023/videos/adrian-boguszewski-beyond-the-continuum-the-importance-of-quantization-in-deep-learning.json @@ -0,0 +1,24 @@ +{ + "description": "Quantization is a process of mapping continuous values to a finite set of discrete values. It is a powerful technique that can significantly reduce the memory footprint and computational requirements of deep learning models, making them more efficient and easier to deploy on resource-constrained devices. In this talk, we will explore the different types of quantization techniques and discuss how they can be applied to deep learning models. In addition, we will cover the basics of NNCF and OpenVINO Toolkit, seeing how they collaborate to achieve outstanding performance - everything in a Jupyter Notebook, which allows you to try it at home.", + "duration": 2037, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Adrian Boguszewski" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ZHE0a7kSMuw/maxresdefault.jpg", + "title": "Beyond the Continuum: The Importance of Quantization in Deep Learning", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ZHE0a7kSMuw" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/alyona-galyeva-dr-sebastian-werner-5-ways-to-fail-with-time-series.json b/pydata-eindhoven-2023/videos/alyona-galyeva-dr-sebastian-werner-5-ways-to-fail-with-time-series.json new file mode 100644 index 000000000..bed971c27 --- /dev/null +++ b/pydata-eindhoven-2023/videos/alyona-galyeva-dr-sebastian-werner-5-ways-to-fail-with-time-series.json @@ -0,0 +1,25 @@ +{ + "description": "Time series data is ubiquitous in our world today: from sensor data collected by the Internet of Things (IoT) to sensors in manufacturing equipment, from financial records to weather data. As the amount of time series data we collect continues to grow, so too do the challenges of analyzing and solving complex business questions with it.\nTime series pose some quite unique challenges that can hit you very hard when you are trying to deploy it to productive use. In this presentation we will highlight some of the most common pitfalls we are recurrently seeing in the field - to spoiler some: time zones, denoising or re-training challenges.\nIn addition, we will also discuss some of the specifics of doing data science with this type of data at scale (and in production).", + "duration": 2043, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Alyona Galyeva", + "Sebastian Werner" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/5jOW6baXYI4/maxresdefault.jpg", + "title": "5 ways to fail with time series", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=5jOW6baXYI4" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/ariadna-kramkovska-background-removal-without-background-knowledge.json b/pydata-eindhoven-2023/videos/ariadna-kramkovska-background-removal-without-background-knowledge.json new file mode 100644 index 000000000..1cf392665 --- /dev/null +++ b/pydata-eindhoven-2023/videos/ariadna-kramkovska-background-removal-without-background-knowledge.json @@ -0,0 +1,24 @@ +{ + "description": "Background removal is a process in image editing and computer vision where the main subject of an image is isolated from its background. Among many spheres where it is applied, product design is one where it is heavily relied upon.\nThe talk is catered primarily towards data scientists and Python enthusiasts who would love to know more about practical computer vision problems. During this presentation, you will learn about our approach to how background removal was tackled in a print-on-demand company with no ground truth data, see how initial research can be done (even without a pricey GPU-based instance), get to see iterative improvements in the results and understand how eventually domain-centric training results can compare to external providers.", + "duration": 1661, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Ariadna Kramkovska" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/YpxZKE2aHHo/maxresdefault.jpg", + "title": "Background removal without background knowledge", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=YpxZKE2aHHo" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/bart-steverink-laurens-schinkelshoek.json b/pydata-eindhoven-2023/videos/bart-steverink-laurens-schinkelshoek.json new file mode 100644 index 000000000..674a3d956 --- /dev/null +++ b/pydata-eindhoven-2023/videos/bart-steverink-laurens-schinkelshoek.json @@ -0,0 +1,25 @@ +{ + "description": "Healtphlus.ai is developping PERISCOPE\u00a9 as a decision support system for surgical departments. With PERICOPE we train a machine learning model on data in the electronic health records. In this talk we will show you our mission, the design choices we have made and the resulting architecture as well as shed some light on some of the challenges we face.", + "duration": 1756, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Bart Steverink", + "Laurens Schinkelshoek" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/pXrMFvNbywA/maxresdefault.jpg", + "title": "Healthplus.ai: machine learning for medical decision support systems", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=pXrMFvNbywA" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/bart-van-erp-brain-inspired-natural-ai-unlocking-intelligence.json b/pydata-eindhoven-2023/videos/bart-van-erp-brain-inspired-natural-ai-unlocking-intelligence.json new file mode 100644 index 000000000..fcd4f2f83 --- /dev/null +++ b/pydata-eindhoven-2023/videos/bart-van-erp-brain-inspired-natural-ai-unlocking-intelligence.json @@ -0,0 +1,24 @@ +{ + "description": "Natural AI aspires to create intelligent agents that draw inspiration from the human brain. This paradigm merges cognitive science with the Free Energy Principle to develop adaptable and human-like AI systems. These low-power agents excel in understanding context, learning from experience, and intuitive interactions, promising a future of energy-efficient, intelligent problem-solving.", + "duration": 1490, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Bart van Erp" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/whvezTvNy5Q/maxresdefault.jpg", + "title": "Brain-Inspired Natural AI: Unlocking Intelligence", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=whvezTvNy5Q" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/chris-laffra-pyscript-python-in-the-browser.json b/pydata-eindhoven-2023/videos/chris-laffra-pyscript-python-in-the-browser.json new file mode 100644 index 000000000..18ed68e51 --- /dev/null +++ b/pydata-eindhoven-2023/videos/chris-laffra-pyscript-python-in-the-browser.json @@ -0,0 +1,24 @@ +{ + "description": "This talk will introduce PyScript \u2013 a framework that enables rich Python applications in the browser. PyScript aims to give users a first-class programming language that has consistent styling rules, is more expressive, and is easier to learn than JavaScript. This talk will give demos and show how to write and host PyScript applications.", + "duration": 1980, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Chris Laffra" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/5nseG-iU62g/maxresdefault.jpg", + "title": "PyScript - Python in the browser", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=5nseG-iU62g" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/dorian-van-den-heede-your-best-bet-effortless-mlops-with-python-models-in-dbt.json b/pydata-eindhoven-2023/videos/dorian-van-den-heede-your-best-bet-effortless-mlops-with-python-models-in-dbt.json new file mode 100644 index 000000000..d06e6c1e4 --- /dev/null +++ b/pydata-eindhoven-2023/videos/dorian-van-den-heede-your-best-bet-effortless-mlops-with-python-models-in-dbt.json @@ -0,0 +1,24 @@ +{ + "description": "In just a few short years, dbt has taken the data engineering world by storm, becoming the de facto standard for data transformation pipelines. Its primary strength, which endures, is SQL, a ubiquitous programming language shared by data analysts, scientists, and engineers.\n\nHowever, there are limitations to what SQL can achieve. With the introduction of Python models since version 1.3, data pipelines in dbt have become significantly more expressive, to the extent that you can implement and orchestrate entire batch machine learning pipelines. By harnessing the inherent power of dbt-core, classic SQL, and the richness of Python and its ecosystem, you can finally create a machine learning workflow that is accessible to everyone on your team.\n\nIn this talk, we will delve into the dos and don'ts of using Python models in dbt, illustrated through an exemplary daily-running machine learning pipeline aimed at beating football odds provided by bookmakers, utilizing the European Soccer Dataset. By the end of this session, you will have a firm grasp of key design patterns for a successful machine learning project within dbt.\n\nPrerequisites:\n- Required: python, SQL, a base understanding of ML\n- Optional: dbt, JinJa, MLOps concepts", + "duration": 1979, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Dorian Van den Heede" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/AdY7xyj7a28/maxresdefault.jpg", + "title": "Your best Bet: Effortless MLOps with Python Models in dbt", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=AdY7xyj7a28" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/erdem-b-bouke-h-enhancing-quality-control-efficiency-a-dynamic-risk-threshold-approach.json b/pydata-eindhoven-2023/videos/erdem-b-bouke-h-enhancing-quality-control-efficiency-a-dynamic-risk-threshold-approach.json new file mode 100644 index 000000000..f6bf85760 --- /dev/null +++ b/pydata-eindhoven-2023/videos/erdem-b-bouke-h-enhancing-quality-control-efficiency-a-dynamic-risk-threshold-approach.json @@ -0,0 +1,25 @@ +{ + "description": "The challenge we faced was in determining when a supply item is faulty and should be sent for quality control inspection. With a daily influx of around 15,000 supply items and a quality control team capable of inspecting only 200 items due to extensive manual labor requirements. Random checking was not a viable option, and we sought to leverage data science to preemptively identify the most likely faulty items.\n\nWhat makes this project particularly captivating is the delicate balance it strikes between forecasting the expected volume of supply items and predicting the risk scores for each item. This synergy between volume forecasting and risk prediction enabled us to establish a dynamic risk threshold, a pivotal factor in determining whether a supply item should undergo quality control inspection at that moment in time. The dynamic nature of this threshold accommodated the ever-changing landscape of supply volumes, ensuring that it adapted to meet the evolving needs of our organization.\n\nWe will first introduce the problem and business requirements, then explain our solution, and finally show an overview of techniques and frameworks used (Delta Unity Catalog tables, Airflow, PySpark, MLflow model registry, HyperOpt model optimization, Databricks serving endpoint).\n\nThe key takeaways for our audience:\nUnderstand the importance and method of dynamically adjusting risk thresholds in quality control.\nLearn the benefits of using a multi modal approach in supply chain management systems.\n\nExpected background knowledge:\nAttendees should have a basic understanding of predictive modeling and data science concepts. Familiarity with supply chain management or quality control processes is beneficial but not mandatory.\n\nAudience:\nThis talk is designed for data- scientists/engineers, supply chain professionals, quality control teams, and anyone interested in the application of data science in real-world challenges.", + "duration": 1730, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Erdem Başeğmez", + "Bouke Hendriks" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/QuoZN4kl63I/maxresdefault.jpg", + "title": "Enhancing Quality Control Efficiency: A Dynamic Risk Threshold Approach", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=QuoZN4kl63I" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/inge-van-den-ende-leveraging-conformal-prediction-for-calibrated-probabilistic-time-series-forecasts.json b/pydata-eindhoven-2023/videos/inge-van-den-ende-leveraging-conformal-prediction-for-calibrated-probabilistic-time-series-forecasts.json new file mode 100644 index 000000000..c4117f204 --- /dev/null +++ b/pydata-eindhoven-2023/videos/inge-van-den-ende-leveraging-conformal-prediction-for-calibrated-probabilistic-time-series-forecasts.json @@ -0,0 +1,24 @@ +{ + "description": "With the increasing amount of volatile renewable energy sources, it becomes more and more challenging to keep the electrical grid in balance. Probabilistic energy price forecasts can help to create this balance. But how do we obtain well-calibrated forecasts? Conformal prediction is a machine learning framework that can produce prediction regions for any underlying point estimator, assuming only the exchangeability of the data. The advantage is that these prediction intervals have valid coverage in finite samples without distributional assumptions beforehand. Valid coverage means that the prediction intervals align with the distribution of the data set, which is not the case for all methods that give prediction intervals. To guarantee valid coverage, a validation set is used, which will be explaining during the talk.\n\nHowever, a disadvantage of the prediction intervals is that they only weakly adapt to the input space. Where most machine learning predictions are based on a large number of input features, the prediction intervals don\u2019t take into account the local variability of this input space. To create a specific probabilistic forecast with valid coverage, conformal prediction can be used to calibrate probabilistic forecasts. Conformalized quantile regression is a common method for this.\n\nThis talk will explain the basics of probabilistic time series forecasting, multiple probabilistic model techniques, and calibration with conformal prediction. Let\u2019s accelerate the renewable energy transition with calibrated probabilistic forecasts!\n\nAre you a data scientist or machine learning engineer that is or want to start building probabilistic forecasts? This talk is for you.", + "duration": 1892, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Inge van den Ende" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/--WcrDRtrYk/maxresdefault.jpg", + "title": "Leveraging conformal prediction for calibrated probabilistic time series forecasts", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=--WcrDRtrYk" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/ionut-barbu-tim-brakenhoff-how-to-build-production-ready-data-science-pipelines-with-kedro.json b/pydata-eindhoven-2023/videos/ionut-barbu-tim-brakenhoff-how-to-build-production-ready-data-science-pipelines-with-kedro.json new file mode 100644 index 000000000..6e00e40f8 --- /dev/null +++ b/pydata-eindhoven-2023/videos/ionut-barbu-tim-brakenhoff-how-to-build-production-ready-data-science-pipelines-with-kedro.json @@ -0,0 +1,25 @@ +{ + "description": "You have just received great news. You are about to join a new exciting project. The client wants you to deliver a maintainable, reproducible and production-ready data science pipeline.\n\nHow do you best start? Because\u2026 It takes really long to put code in production and we have to rewrite and restructure large parts of it\u2026 I have to think about Sphinx, black, Cookiecutter Data Science, Docker, Python Logging, virtual environments, Pytest, and more\u2026 People on my team all have different levels of exposure to software engineering best-practice.\n\nIn this talk we demonstrate how we benefit from Kedro to tackle these hurdles of creating machine learning products. Kedro is an open-source Python framework for creating reproducible, maintainable and modular data science code \u2013 code that easily transitions into production. We will focus on the Kedro project structure, the decoupling between the data and code layers, nodes, pipelines, and the built-in visualizations.\n\nWe end the talk with sharing a recent project where we leveraged Kedro. Together with a Dutch horticulture company, we helped them optimize the energy cost mix in their greenhouses. We built and easily put into production an end-to-end Kedro optimization pipeline that demonstrates promising results of 10-15% energy cost reduction.\n\nThis presentation is aimed at data scientists, data engineers and machine-learning engineers alike that need to develop and deploy production-level data pipelines. No prior knowledge is required.", + "duration": 1953, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Ionut Barbu", + "Tim Brakenhoff" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/but_AZV0rhA/maxresdefault.jpg", + "title": "How to build production-ready data science pipelines with Kedro", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=but_AZV0rhA" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/jeroen-overschie-dataset-enrichment-using-llm-s.json b/pydata-eindhoven-2023/videos/jeroen-overschie-dataset-enrichment-using-llm-s.json new file mode 100644 index 000000000..3690a8a8e --- /dev/null +++ b/pydata-eindhoven-2023/videos/jeroen-overschie-dataset-enrichment-using-llm-s.json @@ -0,0 +1,24 @@ +{ + "description": "Welcome! Let's discover more about LLM's together. You will learn about how to tame the LLM to get the output you want in a playful way \ud83d\udc4f\ud83c\udffb.\n\nJoin us if you \ud83e\udef5:\n- Are interested in LLM's\n- Want to know how you can use LLM's to extract structured information from text\n- You know some Python\n\nContents of the talk \ud83d\udccc:\n[1 min] Intro\n[3 min] The current state of LLM's\n[3 min] Usecase: dataset with mixed structured/unstructured data\n[10 min] Prompt engineering\n[8 min] Going structured (JSON, Function Calling)\n[5 min] Conclusion & results\n\ud83c\udfe1 What you will take home\nAt the end of the talk, you will be taking home the following:\n- What LLM's are currently around and how the landscape looks like\n- How you can use a LLM to gather extract previously hidden features\n- How you should deal with LLM's deviating from the requested output protocol\n- What Function Calling is and how you can use it to your advantage\n- How different LLM's perform (GPT, PaLM, ...) for extracting data from the housing usecase \ud83c\udfe1\n\n\u2764\ufe0f Open Source Software\nThe Python libraries used to access the LLM and post-process the LLM results are openai and instructor, which are both Open Source. The LLM's used are hosted by cloud providers but could alternatively be swapped out for Open Source alternatives.\n\n\ud83c\udf92 Pre-requisites\nSome Python knowledge is recommended, but the talk can also be followed without! For the rest no previous knowledge about LLM's is required \u2713.", + "duration": 2061, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Jeroen Overschie" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/hJeMtABYkEQ/maxresdefault.jpg", + "title": "Dataset enrichment using LLM's ✨", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=hJeMtABYkEQ" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/joris-k-building-fast-packages-faster-julia-as-a-backend-to-python-and-r.json b/pydata-eindhoven-2023/videos/joris-k-building-fast-packages-faster-julia-as-a-backend-to-python-and-r.json new file mode 100644 index 000000000..de6882929 --- /dev/null +++ b/pydata-eindhoven-2023/videos/joris-k-building-fast-packages-faster-julia-as-a-backend-to-python-and-r.json @@ -0,0 +1,24 @@ +{ + "description": "Using Julia instead of C or Fortran for package internals can have many advantages for Python and R users, since the language acts more similar to the higher level languages. We will discuss how to make this a reality by demonstrating how it was done for diffeqpy and diffeqr, packages in Python and R respectively that interface with Julia's DifferentialEquations.jl from the SciML organization. The way that it was done will be detailed in order to help developers replicate the format for other package types. We will discuss the performance advantages that can be seen from this arrangement, showcasing examples where Julia-based GPU-accelerated differential equation solvers achieve a 20x-100x acceleration over PyTorch and Jax.", + "duration": 2044, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Joris Kraak" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/JD8H5x8uSpM/maxresdefault.jpg", + "title": "Building Fast Packages Faster: Julia as a Backend to Python and R", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=JD8H5x8uSpM" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/maarten-breddels-keynote-solara-simplifies-building-complex-dashboards.json b/pydata-eindhoven-2023/videos/maarten-breddels-keynote-solara-simplifies-building-complex-dashboards.json new file mode 100644 index 000000000..f876cd64f --- /dev/null +++ b/pydata-eindhoven-2023/videos/maarten-breddels-keynote-solara-simplifies-building-complex-dashboards.json @@ -0,0 +1,24 @@ +{ + "description": "Many Python frameworks are suitable for creating basic dashboards, but struggle with more complex ones. Though many teams default to splitting into separate frontend and backend divisions when faced with increasing dashboard complexity, this approach introduces its own set of challenges, like reduced personnel interchangeability and cumbersome refactoring due to REST API changes.\n\nSolara, our new web framework, addresses these challenges. We use the foundational principles of ReactJS, yet maintain the ease of writing only Python. Solara has a declarative API, designed for dynamic and complex UI's, yet easy to write. Reactive variables power our state management which automatically trigger rerenders. Our component-centric architecture stimulates code reusability, and hot reloading promotes efficient workflows. Together with our rich set of UI and data-focused components, Solara spans the entire spectrum from rapid prototyping to robust, complex dashboards.\n\nWithout modification your application and components will work in Jupyter, Voil\u00e0 and on our standalone server for high scalability. Our server can run along existing FastAPI, Starlette, Flask and even Django servers to integrate with existing web services. We prioritize code quality and developer friendliness by including strong typing and first class support for unit and integration testing.", + "duration": 2669, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Maarten Breddels" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/2MVUZV0icxU/maxresdefault.jpg", + "title": "Solara simplifies building complex dashboards.", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2MVUZV0icxU" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/marco-gorelli-polars-and-time-zones-everything-you-need-to-know.json b/pydata-eindhoven-2023/videos/marco-gorelli-polars-and-time-zones-everything-you-need-to-know.json new file mode 100644 index 000000000..dc7cc0f95 --- /dev/null +++ b/pydata-eindhoven-2023/videos/marco-gorelli-polars-and-time-zones-everything-you-need-to-know.json @@ -0,0 +1,24 @@ +{ + "description": "If you work with time series, you will inevitably come across time-zone-related issues, such as daylight saving time or time zone changes. The bad news is that these issues are incredibly difficult to deal with by hand. The good news is that Polars has full support for time zones built-in! By letting Polars handle time zones for you, your time series analysis not only be blazingly fast, but also very rock-solid.\n\nTime zones may seem easy, but there are several subtleties to be aware of:\n- what's the difference between '1d' and '24h'?\n- how do you deal with DST (daylight-savings)?\n- what's the difference between a UTC-offset and a time zone?\n\nNo prior Polars knowledge is expected (though basic familiarity would be helpful).\n\nThe format of the talk will be:\n- 5 minutes: motivation, why should you never ever deal with time zones by hand?\n- 5 minutes: Polars crash course\n- 7.5 minutes: parsing datetimes in Polars (including time-zone-aware ones)\n- 7.5 minutes: time zones in other Polars operations (offsets, resampling, truncation, ...)\n- Q&A\n\nBy the end of the talk, you will have learned how to effectively leverage Polars when performing time series analysis involving time zones.", + "duration": 1810, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Marco Gorelli" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/Qr0PnDox5MM/maxresdefault.jpg", + "title": "Polars and time zones: everything you need to know", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Qr0PnDox5MM" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/max-pumperla-building-deploying-llm-apps.json b/pydata-eindhoven-2023/videos/max-pumperla-building-deploying-llm-apps.json new file mode 100644 index 000000000..53336ad28 --- /dev/null +++ b/pydata-eindhoven-2023/videos/max-pumperla-building-deploying-llm-apps.json @@ -0,0 +1,24 @@ +{ + "description": "Large language models are all the rage, but building scalable applications with them can be costly and difficult. In this talk, we give you a glimpse at the emerging ecosystem of LLM apps beyond just ChatGPT. In particular, we focus on OSS alternatives, like the Llama model family, and show you how to use them in your own projects. We discuss how to leverage services like Anyscale Endpoints in Python to get LLM apps up and running quickly. To demonstrate this, we showcase two application we built ourselves, namely a GitHub bot that helps you with your pull requests, and an \"Ask AI\" chatbot that we integrated into our project documentation.", + "duration": 2150, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Max Pumperla" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/Gj2_knxjVOk/maxresdefault.jpg", + "title": "Building & Deploying LLM Apps", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Gj2_knxjVOk" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/maximilian-m-shaptivating-insights-unravelling-blackbox-ai-models.json b/pydata-eindhoven-2023/videos/maximilian-m-shaptivating-insights-unravelling-blackbox-ai-models.json new file mode 100644 index 000000000..19a826ab0 --- /dev/null +++ b/pydata-eindhoven-2023/videos/maximilian-m-shaptivating-insights-unravelling-blackbox-ai-models.json @@ -0,0 +1,24 @@ +{ + "description": "he advancements of (black box) artificial intelligence toolkits in recent years have made implementing AI models a commodity. However, model implementation is only the beginning during application development. Understanding, optimizing, and troubleshooting models is remaining as a constant challenge. In particular, the understanding (or explainability) of AI models is expected to become a requirement with the EU AI act, expected to pass this year.\n\nIn this presentation, SHAP (SHapley Additive exPlanations), a model agnostic AI explainability framework is explained using the example of a tabular classification problem in Python. First, we look at the theory behind SHAP and demonstrate the practical implementation in Python. Second, the usage of the SHAP framework is showcased for both global and local explainability. For this the filtering of bank transactions for suspicious activities is used as an example. It will be shown how SHAP was used to perform feature selection, understand the model\u2019s sensitivity to individual features and how single predictions can be explained. Last, a translation from SHAP to human readable output will be shown which was developed to explain the model predictions to the end user.\n\nTime breakdown:\n- General + domain introduction: 5 min\n- SHAP theory and Python framework: 5 min\n- Deep-dive into global and local explainability: 10 min\n- Converting SHAP values to human readable output: 5 min\n- Q&A: 5 min", + "duration": 2102, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Maximilian Messmer" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/uswpei2GpLE/maxresdefault.jpg", + "title": "SHAPtivating Insights: unravelling blackbox AI models", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=uswpei2GpLE" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/mickey-beurskens-team-red-breaking-large-language-models.json b/pydata-eindhoven-2023/videos/mickey-beurskens-team-red-breaking-large-language-models.json new file mode 100644 index 000000000..ce7d8704c --- /dev/null +++ b/pydata-eindhoven-2023/videos/mickey-beurskens-team-red-breaking-large-language-models.json @@ -0,0 +1,24 @@ +{ + "description": "The best way to learn how to secure a system is to know how it breaks! In this talk we will take on the role of the Red Team, try to break Large Language Models using prompt injection, and learn about the kind of havoc we can cause. Learn how to safeguard your LLM applications by understanding their weaknesses.", + "duration": 1805, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Mickey Beurskens" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/F-UGqGsQvhc/maxresdefault.jpg", + "title": "Team Red: Breaking Large Language Models", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=F-UGqGsQvhc" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/patrick-de-oude-reinforcement-learning-for-food-waste-reduction-within-albert-heijn.json b/pydata-eindhoven-2023/videos/patrick-de-oude-reinforcement-learning-for-food-waste-reduction-within-albert-heijn.json new file mode 100644 index 000000000..39b07aeba --- /dev/null +++ b/pydata-eindhoven-2023/videos/patrick-de-oude-reinforcement-learning-for-food-waste-reduction-within-albert-heijn.json @@ -0,0 +1,24 @@ +{ + "description": "Food waste is a large global problem that has a huge impact on our environment. Currently, about one-third of the world\u2019s food goes to waste. In this talk we focus on an initiative to reduce food waste in a chain of supermarkets. The initiative applies increasing markdowns to products nearing their expiry date. The objective is to minimize food waste without significantly increasing the cost of the markdowns. This involves making a sequence of decisions about what markdown level to set over time while maximizing a cumulative expected future reward based on markdown costs and products destroyed.\n\nIn this talk we show how we tackle this problem with reinforcement learning. We model the problem using a discrete state-space and action set. We learn the policy from historical data using offline Q-learning.\n\nAdditionally, we show how we evaluate new policies using off-policy evaluation techniques and describe the limitations of these evaluation methods.\n\nLastly, we address the experimental setup used to measure the policies\u2019 true performance in production.\n\nAgenda:\n- 0-5 min: Introduction about food waste and dynamic markdown\n- 5-25 min: reinforcement learning\n- Implementation\n- Features & state space\n- Q-learning\n- Off-policy evaluation\n- Experiments\n- 25-30 min: Conclusions and future work\n- 30-35 min: Q&A\n\nTarget audience: anyone interested in the application of reinforcement learning, or technologies for food waste reduction. Familiarity with reinforcement learning will be useful.\n\nMain take-away: practical information around training, evaluating and deploying a simple reinforcement learning mode", + "duration": 1917, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Patrick de Oude" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/eT5Pc2uL6sw/maxresdefault.jpg", + "title": "Reinforcement learning for Food Waste Reduction within Albert Heijn", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=eT5Pc2uL6sw" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/ritchie-vink-keynote-on-polars-plugins.json b/pydata-eindhoven-2023/videos/ritchie-vink-keynote-on-polars-plugins.json new file mode 100644 index 000000000..ea62238ad --- /dev/null +++ b/pydata-eindhoven-2023/videos/ritchie-vink-keynote-on-polars-plugins.json @@ -0,0 +1,26 @@ +{ + "description": "Machines have changed a lot in the last decade and Polars is a query engine that is written from scratch in Rust to benefit from the modern hardware. Effective parallelism, cache efficient data structures and algorithms are ingrained in its design. Thanks to those efforts Polars is among the fastest single node OSS query engines out there. Another goal of polars is rethinking the way DataFrame's should be interacted with. Polars comes with a very declarative and versatile API that enables users to write readable. This talk will focus on how Polars can be used and what you gain from using it idiomatically. Most importantly, this talk introduces Polars Plugins, a novel way to define your UDF's in Rust and register them in the main Python polars' engine. This will grant your specific business logic with - Rust performance (The familiar vectorization in python). - Paralellism (orchestrated by the polars engine) (No GIL Locking!) - Optimizations (The optimizer will use the properties of your UDF to elide work).", + "duration": 2961, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Ritchie Vink" + ], + "tags": [ + "Keynote" + ], + "thumbnail_url": "https://i.ytimg.com/vi/jKW-CBV7NUM/maxresdefault.jpg", + "title": "Keynote on Polars Plugins", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=jKW-CBV7NUM" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/ruben-arts-a-new-way-to-manage-python-environments-with-pixi.json b/pydata-eindhoven-2023/videos/ruben-arts-a-new-way-to-manage-python-environments-with-pixi.json new file mode 100644 index 000000000..3f0756ae6 --- /dev/null +++ b/pydata-eindhoven-2023/videos/ruben-arts-a-new-way-to-manage-python-environments-with-pixi.json @@ -0,0 +1,24 @@ +{ + "description": "pixi is changing the field for software environment management. pixi bridges the gap between conda and pip with a single binary tool that automatically generates lock-files and can be configured with a single configuration file. While we build on the conda ecosystem, we\u2019re also integrating PyPi (pip) into pixi. This way you\u2019ll only need one tool across all platforms.", + "duration": 1927, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Ruben Arts" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/P7CO25cx4CI/maxresdefault.jpg", + "title": "A New Way to Manage Python Environments with Pixi", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=P7CO25cx4CI" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/stephan-sahm-jolin-io-reactive-notebooks-for-python.json b/pydata-eindhoven-2023/videos/stephan-sahm-jolin-io-reactive-notebooks-for-python.json new file mode 100644 index 000000000..e3dbda440 --- /dev/null +++ b/pydata-eindhoven-2023/videos/stephan-sahm-jolin-io-reactive-notebooks-for-python.json @@ -0,0 +1,24 @@ +{ + "description": "You love using notebooks for data science? So do we!\n\nWe've built everything you need to seamlessly transition from experimenting and developing in a notebook to automated testing and finally deploying it for production. You can transform your notebook into a dashboard, a real-time process, perform big data analysis (coming soon), or create a self-updating report.\n\nEverything is still just a notebook. It's scientists-friendly, promotes collaboration, and is ready for both you and your company.\n\nStart right away at cloud.jolin.io or reach out to us for a custom deployment within your infrastructure.", + "duration": 2025, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Stephan Sahm" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/0uURjA-_X4k/maxresdefault.jpg", + "title": "Jolin.io - Reactive Notebooks for Python", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=0uURjA-_X4k" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/stephan-sahm-reactive-notebooks-for-python.json b/pydata-eindhoven-2023/videos/stephan-sahm-reactive-notebooks-for-python.json new file mode 100644 index 000000000..37e772657 --- /dev/null +++ b/pydata-eindhoven-2023/videos/stephan-sahm-reactive-notebooks-for-python.json @@ -0,0 +1,24 @@ +{ + "description": "Jupyter Notebooks revolutionized Data Science - Reactive Notebooks build on top of this giant. Reactivity solves a couple of problems:\n\nImproved safety: It is impossible to get into an invalid state (where you would hit \"rerun all\" in Jupyter).\n\nSimplified interactivity: Changing a value also updates those cells which depend on it.\n\nSimplified deployment: The notebook itself is already a dashboard (with fully customizable html frontend).\n\nIn addition, the new notebook allows for self-updates, i.e. listening for some remote events and rerunning notebook cells automatically with the new updated value. This upgrades your notebook to a real-time analytics process.\n\nIn this talk I will present the new notebook and go through all its powerful features.", + "duration": 2160, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Stephan Sahm" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/lleuSqm83aQ/maxresdefault.jpg", + "title": "Reactive Notebooks for Python", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=lleuSqm83aQ" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/thomas-woudsma-structuring-automated-visual-inspections-with-django-and-grpc.json b/pydata-eindhoven-2023/videos/thomas-woudsma-structuring-automated-visual-inspections-with-django-and-grpc.json new file mode 100644 index 000000000..8ce143e4e --- /dev/null +++ b/pydata-eindhoven-2023/videos/thomas-woudsma-structuring-automated-visual-inspections-with-django-and-grpc.json @@ -0,0 +1,24 @@ +{ + "description": "Setting up an ML application by creating some ad-hoc dataset and training a model with an unversioned Python script just does not cut it anymore. The MLOps process helps to structure the development and life cycle of any ML application to make sure that data is traceable, and performance is reproducible. In this talk, an automated visual inspection application is used as an example to show a data definition and labeling platform using Django, and an image ingress and streaming service using gRPC. This system can serve as an example for any future ML application as the same principles can be applied. A demonstration of this system is also shown at the end of the session. Having some basic knowledge of Django and gRPC is preferred, but it is not required.", + "duration": 1976, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Thomas Woudsma" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/EkazYzD3RkM/maxresdefault.jpg", + "title": "Structuring automated visual inspections with Django and gRPC", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=EkazYzD3RkM" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/vera-van-der-lelij-adjusting-2d-prediction-models-to-be-usable-for-3d-objects.json b/pydata-eindhoven-2023/videos/vera-van-der-lelij-adjusting-2d-prediction-models-to-be-usable-for-3d-objects.json new file mode 100644 index 000000000..557f1adaa --- /dev/null +++ b/pydata-eindhoven-2023/videos/vera-van-der-lelij-adjusting-2d-prediction-models-to-be-usable-for-3d-objects.json @@ -0,0 +1,24 @@ +{ + "description": "With the rise of 3D printing/scanning, a significant increase in available 3D datasets has taken place. However, most 3D research focusses on classification, recognition or shape analysis of 3D objects. For the past 1,5 year we have researched the subject of predicting growth of natural growing 3D objects using machine learning techniques. During this talk we would like to share our findings and the challenges we encountered when working on this research. The presentation will be structured as follows:\n\nIntroduction and use case (5 minutes)\nBasic 3D-techniques using open3d package (5 minutes)\nSolution method (10 minutes)\nSummary and future research (5 minutes)\nQ&A (5 minutes)", + "duration": 2102, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Vera van der Lelij" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/_7XRmuApGmE/maxresdefault.jpg", + "title": "Adjusting 2D prediction models to be usable for 3D objects", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=_7XRmuApGmE" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/vincent-d-warmerdam-active-teaching-human-learning.json b/pydata-eindhoven-2023/videos/vincent-d-warmerdam-active-teaching-human-learning.json new file mode 100644 index 000000000..8b73ad715 --- /dev/null +++ b/pydata-eindhoven-2023/videos/vincent-d-warmerdam-active-teaching-human-learning.json @@ -0,0 +1,24 @@ +{ + "description": "Want a dataset for ML? Internet says you should use ... active learning!\n\nIt's not a bad idea. When you're creating your own training data you typically want to focus on examples that can teach a machine learning algorithm the most. That's why active learning techniques typically fetch examples with the lowest confidence scores to annotate first. The thinking is that low confidence regions represent the areas where the algorithm might learn more than regions where the algorithm seems sure of itself.\n\nAgain, it's not a bad idea. But it's an approach that can be improved by rethinking some parts. Maybe it would be better for the human to understand the mistakes that the model makes and uses this information to actively teach the model on how to improve.\n\nThis talk is all about exploring this idea.", + "duration": 2245, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Vincent D. Warmerdam" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/Z2NxN9sl9Vk/maxresdefault.jpg", + "title": "Active Teaching, Human Learning", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Z2NxN9sl9Vk" + } + ] +} diff --git a/pydata-eindhoven-2023/videos/vladimir-osin-taming-the-machine-basics-of-ml-models-training-and-inference-optimization.json b/pydata-eindhoven-2023/videos/vladimir-osin-taming-the-machine-basics-of-ml-models-training-and-inference-optimization.json new file mode 100644 index 000000000..22f1053b6 --- /dev/null +++ b/pydata-eindhoven-2023/videos/vladimir-osin-taming-the-machine-basics-of-ml-models-training-and-inference-optimization.json @@ -0,0 +1,24 @@ +{ + "description": "This introductory talk is designed to address the prevalent industry challenge of Machine Learning (ML) model deployment. Given the plethora of frameworks, compilers, and runtimes, ML engineers and Data Scientists often find this a daunting task. Our discussion will traverse all phases of this process, aiming to simplify the complexity.\n\nWe'll initiate the discussion with the ecosystem, tools, and methods, focusing initially on the training stage before transitioning to the inference stage. Beginning with a model in Jupyter Notebook, we'll illustrate how to expedite training time utilizing auto mixed precision, multiple GPUs, batch-size strategies, and JIT compilation. Subsequently, we'll concentrate on how to fine-tune your model inference for specific target hardware using necessary tools (NVIDIA Tensor RT (LLM) and Triton, ONYX) or adopting a more comprehensive approach with the new Mojo programming language.\n\nTarget Audience\nOur primary audience is Machine Learning Engineers, Data Scientists, Software Engineers, and students aspiring to these roles. A basic understanding of Python and a healthy dose of enthusiasm are the sole prerequisites for this seminar.\n\nAgenda\n0-5 mins: Introduction to the Ecosystem and Tooling\n5-15 mins: Strategies to Accelerate Model Training\n15-25 mins: Techniques to Boost Model Inference\n25-30 mins: Concluding Remarks and Future Directions", + "duration": 1891, + "language": "eng", + "recorded": "2023-11-30", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023" + } + ], + "speakers": [ + "Vladimir Osin" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/94gShEYPCCQ/maxresdefault.jpg", + "title": "Taming the Machine: Basics of ML Models Training and Inference Optimization", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=94gShEYPCCQ" + } + ] +}