|
27 | 27 | { |
28 | 28 | "cell_type": "code", |
29 | 29 | "execution_count": 1, |
30 | | - "metadata": {}, |
| 30 | + "metadata": { |
| 31 | + "execution": { |
| 32 | + "iopub.execute_input": "2025-11-18T22:31:17.573145Z", |
| 33 | + "iopub.status.busy": "2025-11-18T22:31:17.572855Z", |
| 34 | + "iopub.status.idle": "2025-11-18T22:31:18.092601Z", |
| 35 | + "shell.execute_reply": "2025-11-18T22:31:18.092331Z", |
| 36 | + "shell.execute_reply.started": "2025-11-18T22:31:17.573125Z" |
| 37 | + } |
| 38 | + }, |
31 | 39 | "outputs": [], |
32 | 40 | "source": [ |
33 | 41 | "import logging\n", |
|
62 | 70 | { |
63 | 71 | "cell_type": "code", |
64 | 72 | "execution_count": 2, |
65 | | - "metadata": {}, |
| 73 | + "metadata": { |
| 74 | + "execution": { |
| 75 | + "iopub.execute_input": "2025-11-18T22:31:18.093348Z", |
| 76 | + "iopub.status.busy": "2025-11-18T22:31:18.093184Z", |
| 77 | + "iopub.status.idle": "2025-11-18T22:31:18.095343Z", |
| 78 | + "shell.execute_reply": "2025-11-18T22:31:18.094962Z", |
| 79 | + "shell.execute_reply.started": "2025-11-18T22:31:18.093338Z" |
| 80 | + } |
| 81 | + }, |
66 | 82 | "outputs": [], |
67 | 83 | "source": [ |
68 | 84 | "logger = logging.getLogger(0)\n", |
|
80 | 96 | { |
81 | 97 | "cell_type": "code", |
82 | 98 | "execution_count": 3, |
83 | | - "metadata": {}, |
| 99 | + "metadata": { |
| 100 | + "execution": { |
| 101 | + "iopub.execute_input": "2025-11-18T22:31:18.095921Z", |
| 102 | + "iopub.status.busy": "2025-11-18T22:31:18.095827Z", |
| 103 | + "iopub.status.idle": "2025-11-18T22:31:18.425513Z", |
| 104 | + "shell.execute_reply": "2025-11-18T22:31:18.424671Z", |
| 105 | + "shell.execute_reply.started": "2025-11-18T22:31:18.095914Z" |
| 106 | + } |
| 107 | + }, |
84 | 108 | "outputs": [], |
85 | 109 | "source": [ |
86 | 110 | "sales = pd.read_csv('https://raw.githubusercontent.com/kjam/data-cleaning-101/master/data/sales_data.csv')" |
|
96 | 120 | { |
97 | 121 | "cell_type": "code", |
98 | 122 | "execution_count": 4, |
99 | | - "metadata": {}, |
| 123 | + "metadata": { |
| 124 | + "execution": { |
| 125 | + "iopub.execute_input": "2025-11-18T22:31:18.426847Z", |
| 126 | + "iopub.status.busy": "2025-11-18T22:31:18.426587Z", |
| 127 | + "iopub.status.idle": "2025-11-18T22:31:18.442451Z", |
| 128 | + "shell.execute_reply": "2025-11-18T22:31:18.441991Z", |
| 129 | + "shell.execute_reply.started": "2025-11-18T22:31:18.426831Z" |
| 130 | + } |
| 131 | + }, |
100 | 132 | "outputs": [ |
101 | 133 | { |
102 | 134 | "data": { |
|
211 | 243 | { |
212 | 244 | "cell_type": "code", |
213 | 245 | "execution_count": 5, |
214 | | - "metadata": {}, |
| 246 | + "metadata": { |
| 247 | + "execution": { |
| 248 | + "iopub.execute_input": "2025-11-18T22:31:18.443750Z", |
| 249 | + "iopub.status.busy": "2025-11-18T22:31:18.443370Z", |
| 250 | + "iopub.status.idle": "2025-11-18T22:31:18.448128Z", |
| 251 | + "shell.execute_reply": "2025-11-18T22:31:18.447641Z", |
| 252 | + "shell.execute_reply.started": "2025-11-18T22:31:18.443732Z" |
| 253 | + } |
| 254 | + }, |
215 | 255 | "outputs": [ |
216 | 256 | { |
217 | 257 | "data": { |
|
247 | 287 | { |
248 | 288 | "cell_type": "code", |
249 | 289 | "execution_count": 6, |
250 | | - "metadata": {}, |
| 290 | + "metadata": { |
| 291 | + "execution": { |
| 292 | + "iopub.execute_input": "2025-11-18T22:31:18.450998Z", |
| 293 | + "iopub.status.busy": "2025-11-18T22:31:18.450836Z", |
| 294 | + "iopub.status.idle": "2025-11-18T22:31:18.453121Z", |
| 295 | + "shell.execute_reply": "2025-11-18T22:31:18.452782Z", |
| 296 | + "shell.execute_reply.started": "2025-11-18T22:31:18.450982Z" |
| 297 | + } |
| 298 | + }, |
251 | 299 | "outputs": [], |
252 | 300 | "source": [ |
253 | 301 | "schema = Schema(\n", |
|
261 | 309 | { |
262 | 310 | "cell_type": "code", |
263 | 311 | "execution_count": 7, |
264 | | - "metadata": {}, |
| 312 | + "metadata": { |
| 313 | + "execution": { |
| 314 | + "iopub.execute_input": "2025-11-18T22:31:18.453628Z", |
| 315 | + "iopub.status.busy": "2025-11-18T22:31:18.453552Z", |
| 316 | + "iopub.status.idle": "2025-11-18T22:31:18.478857Z", |
| 317 | + "shell.execute_reply": "2025-11-18T22:31:18.478488Z", |
| 318 | + "shell.execute_reply.started": "2025-11-18T22:31:18.453621Z" |
| 319 | + } |
| 320 | + }, |
265 | 321 | "outputs": [ |
266 | 322 | { |
267 | 323 | "name": "stderr", |
|
361 | 417 | { |
362 | 418 | "cell_type": "code", |
363 | 419 | "execution_count": 8, |
364 | | - "metadata": {}, |
| 420 | + "metadata": { |
| 421 | + "execution": { |
| 422 | + "iopub.execute_input": "2025-11-18T22:31:18.479687Z", |
| 423 | + "iopub.status.busy": "2025-11-18T22:31:18.479594Z", |
| 424 | + "iopub.status.idle": "2025-11-18T22:31:18.482224Z", |
| 425 | + "shell.execute_reply": "2025-11-18T22:31:18.481893Z", |
| 426 | + "shell.execute_reply.started": "2025-11-18T22:31:18.479678Z" |
| 427 | + } |
| 428 | + }, |
365 | 429 | "outputs": [ |
366 | 430 | { |
367 | 431 | "data": { |
|
399 | 463 | { |
400 | 464 | "cell_type": "code", |
401 | 465 | "execution_count": 9, |
402 | | - "metadata": {}, |
| 466 | + "metadata": { |
| 467 | + "execution": { |
| 468 | + "iopub.execute_input": "2025-11-18T22:31:18.482964Z", |
| 469 | + "iopub.status.busy": "2025-11-18T22:31:18.482903Z", |
| 470 | + "iopub.status.idle": "2025-11-18T22:31:18.484745Z", |
| 471 | + "shell.execute_reply": "2025-11-18T22:31:18.484447Z", |
| 472 | + "shell.execute_reply.started": "2025-11-18T22:31:18.482958Z" |
| 473 | + } |
| 474 | + }, |
403 | 475 | "outputs": [], |
404 | 476 | "source": [ |
405 | | - "def ValidDate(fmt=\"%Y-%m-%d %H:%M:%S\"):\n", |
| 477 | + "def valid_date(fmt=\"%Y-%m-%d %H:%M:%S\"):\n", |
406 | 478 | " return lambda v: datetime.strptime(v, fmt)" |
407 | 479 | ] |
408 | 480 | }, |
409 | 481 | { |
410 | 482 | "cell_type": "code", |
411 | 483 | "execution_count": 10, |
412 | | - "metadata": {}, |
| 484 | + "metadata": { |
| 485 | + "execution": { |
| 486 | + "iopub.execute_input": "2025-11-18T22:31:18.485394Z", |
| 487 | + "iopub.status.busy": "2025-11-18T22:31:18.485245Z", |
| 488 | + "iopub.status.idle": "2025-11-18T22:31:18.487196Z", |
| 489 | + "shell.execute_reply": "2025-11-18T22:31:18.486926Z", |
| 490 | + "shell.execute_reply.started": "2025-11-18T22:31:18.485388Z" |
| 491 | + } |
| 492 | + }, |
413 | 493 | "outputs": [], |
414 | 494 | "source": [ |
415 | 495 | "schema = Schema(\n", |
416 | 496 | " {\n", |
417 | | - " Required(\"timestamp\"): All(ValidDate()),\n", |
| 497 | + " Required(\"timestamp\"): All(valid_date()),\n", |
418 | 498 | " },\n", |
419 | | - " extra=ALLOW_EXTRA,\n", |
| 499 | + " extra = ALLOW_EXTRA,\n", |
420 | 500 | ")" |
421 | 501 | ] |
422 | 502 | }, |
423 | 503 | { |
424 | 504 | "cell_type": "code", |
425 | 505 | "execution_count": 11, |
426 | | - "metadata": {}, |
| 506 | + "metadata": { |
| 507 | + "execution": { |
| 508 | + "iopub.execute_input": "2025-11-18T22:31:18.487668Z", |
| 509 | + "iopub.status.busy": "2025-11-18T22:31:18.487587Z", |
| 510 | + "iopub.status.idle": "2025-11-18T22:31:18.494464Z", |
| 511 | + "shell.execute_reply": "2025-11-18T22:31:18.494239Z", |
| 512 | + "shell.execute_reply.started": "2025-11-18T22:31:18.487661Z" |
| 513 | + } |
| 514 | + }, |
427 | 515 | "outputs": [], |
428 | 516 | "source": [ |
429 | 517 | "error_count = 0\n", |
|
440 | 528 | { |
441 | 529 | "cell_type": "code", |
442 | 530 | "execution_count": 12, |
443 | | - "metadata": {}, |
| 531 | + "metadata": { |
| 532 | + "execution": { |
| 533 | + "iopub.execute_input": "2025-11-18T22:31:18.495261Z", |
| 534 | + "iopub.status.busy": "2025-11-18T22:31:18.495136Z", |
| 535 | + "iopub.status.idle": "2025-11-18T22:31:18.497949Z", |
| 536 | + "shell.execute_reply": "2025-11-18T22:31:18.497655Z", |
| 537 | + "shell.execute_reply.started": "2025-11-18T22:31:18.495250Z" |
| 538 | + } |
| 539 | + }, |
444 | 540 | "outputs": [ |
445 | 541 | { |
446 | 542 | "data": { |
|
467 | 563 | { |
468 | 564 | "cell_type": "code", |
469 | 565 | "execution_count": 13, |
470 | | - "metadata": {}, |
| 566 | + "metadata": { |
| 567 | + "execution": { |
| 568 | + "iopub.execute_input": "2025-11-18T22:31:18.498347Z", |
| 569 | + "iopub.status.busy": "2025-11-18T22:31:18.498271Z", |
| 570 | + "iopub.status.idle": "2025-11-18T22:31:18.500405Z", |
| 571 | + "shell.execute_reply": "2025-11-18T22:31:18.500183Z", |
| 572 | + "shell.execute_reply.started": "2025-11-18T22:31:18.498341Z" |
| 573 | + } |
| 574 | + }, |
471 | 575 | "outputs": [], |
472 | 576 | "source": [ |
473 | | - "def ValidDate(fmt=\"%Y-%m-%d %H:%M:%S\"):\n", |
| 577 | + "def valid_date(fmt=\"%Y-%m-%d %H:%M:%S\"):\n", |
474 | 578 | " def validation_func(v):\n", |
475 | 579 | " try:\n", |
476 | 580 | " assert datetime.strptime(v, fmt) <= datetime.now()\n", |
|
483 | 587 | { |
484 | 588 | "cell_type": "code", |
485 | 589 | "execution_count": 14, |
486 | | - "metadata": {}, |
| 590 | + "metadata": { |
| 591 | + "execution": { |
| 592 | + "iopub.execute_input": "2025-11-18T22:31:18.500793Z", |
| 593 | + "iopub.status.busy": "2025-11-18T22:31:18.500676Z", |
| 594 | + "iopub.status.idle": "2025-11-18T22:31:18.502440Z", |
| 595 | + "shell.execute_reply": "2025-11-18T22:31:18.502172Z", |
| 596 | + "shell.execute_reply.started": "2025-11-18T22:31:18.500787Z" |
| 597 | + } |
| 598 | + }, |
487 | 599 | "outputs": [], |
488 | 600 | "source": [ |
489 | 601 | "schema = Schema(\n", |
490 | 602 | " {\n", |
491 | | - " Required(\"timestamp\"): All(ValidDate()),\n", |
| 603 | + " Required(\"timestamp\"): All(valid_date()),\n", |
492 | 604 | " },\n", |
493 | | - " extra=ALLOW_EXTRA,\n", |
| 605 | + " extra = ALLOW_EXTRA,\n", |
494 | 606 | ")" |
495 | 607 | ] |
496 | 608 | }, |
497 | 609 | { |
498 | 610 | "cell_type": "code", |
499 | 611 | "execution_count": 15, |
500 | | - "metadata": {}, |
| 612 | + "metadata": { |
| 613 | + "execution": { |
| 614 | + "iopub.execute_input": "2025-11-18T22:31:18.502983Z", |
| 615 | + "iopub.status.busy": "2025-11-18T22:31:18.502885Z", |
| 616 | + "iopub.status.idle": "2025-11-18T22:31:18.509278Z", |
| 617 | + "shell.execute_reply": "2025-11-18T22:31:18.508890Z", |
| 618 | + "shell.execute_reply.started": "2025-11-18T22:31:18.502974Z" |
| 619 | + } |
| 620 | + }, |
501 | 621 | "outputs": [], |
502 | 622 | "source": [ |
503 | 623 | "error_count = 0\n", |
|
514 | 634 | { |
515 | 635 | "cell_type": "code", |
516 | 636 | "execution_count": 16, |
517 | | - "metadata": {}, |
| 637 | + "metadata": { |
| 638 | + "execution": { |
| 639 | + "iopub.execute_input": "2025-11-18T22:31:18.509964Z", |
| 640 | + "iopub.status.busy": "2025-11-18T22:31:18.509861Z", |
| 641 | + "iopub.status.idle": "2025-11-18T22:31:18.512235Z", |
| 642 | + "shell.execute_reply": "2025-11-18T22:31:18.511993Z", |
| 643 | + "shell.execute_reply.started": "2025-11-18T22:31:18.509949Z" |
| 644 | + } |
| 645 | + }, |
518 | 646 | "outputs": [ |
519 | 647 | { |
520 | 648 | "data": { |
|
548 | 676 | "name": "python", |
549 | 677 | "nbconvert_exporter": "python", |
550 | 678 | "pygments_lexer": "ipython3", |
551 | | - "version": "3.11.4" |
| 679 | + "version": "3.11.13" |
552 | 680 | }, |
553 | 681 | "latex_envs": { |
554 | 682 | "LaTeX_envs_menu_present": true, |
|
0 commit comments