{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state":{}, "a0c6705f4fad4f519897e05a5de06a94": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_d449a38929414a4cb4bebb2e01fbaf44", "IPY_MODEL_eebed51a9c8b41939c686c261acc7e8a", "IPY_MODEL_0f3a830648d8410894aa3dc47227992f" ], "layout": "IPY_MODEL_07a1742aee284bf5834ca77a5f03d3ee" } }, "d449a38929414a4cb4bebb2e01fbaf44": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_789e5125b32f4a8390cff308983b878c", "placeholder": "​", "style": "IPY_MODEL_88eebcdcb5eb45bfbe040c8ccbf87411", "value": "Crawler.zip: 100%" } }, "eebed51a9c8b41939c686c261acc7e8a": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_922f135bb50f4144ab0d6abad0df9e69", "max": 439926114, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_68b6bed833c74ccea239002b23c404a4", "value": 439926114 } }, "0f3a830648d8410894aa3dc47227992f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3ab181bde71d481fa20d3bd5c5d30803", "placeholder": "​", "style": "IPY_MODEL_5768e9e182a04c08b4e8887bc4bda5f1", "value": " 440M/440M [00:04<00:00, 91.5MB/s]" } }, "07a1742aee284bf5834ca77a5f03d3ee": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "789e5125b32f4a8390cff308983b878c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "88eebcdcb5eb45bfbe040c8ccbf87411": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "922f135bb50f4144ab0d6abad0df9e69": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "68b6bed833c74ccea239002b23c404a4": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "3ab181bde71d481fa20d3bd5c5d30803": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5768e9e182a04c08b4e8887bc4bda5f1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "268c8a22fb2a4d2eab9c3e25de984221": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2d2b4736f11c433585b752139dd70dc9", "IPY_MODEL_80d59424f44a4b199008e0d16343f4b0", "IPY_MODEL_ed3db2b25f0e45a7a9cdf4405b1a87a1" ], "layout": "IPY_MODEL_df1e6015d489417485e3a28bbbd54728" } }, "2d2b4736f11c433585b752139dd70dc9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f2c35ce7b4264cfa80ace120c2a8c2f9", "placeholder": "​", "style": "IPY_MODEL_25469448f7d6428ca97a96f9c9f156b7", "value": "microsoft-learn.jsonl: 100%" } }, "80d59424f44a4b199008e0d16343f4b0": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6586ec5c75454671b85bd4659fd9a40e", "max": 270257457, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_11273ae865b44f2b8d3a4363b3fa1178", "value": 270257457 } }, "ed3db2b25f0e45a7a9cdf4405b1a87a1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e36a5f615ae948148aa1222b0ddc514b", "placeholder": "​", "style": "IPY_MODEL_e64d5d3198634727a6a9f3634fb4afb8", "value": " 270M/270M [00:07<00:00, 40.9MB/s]" } }, "df1e6015d489417485e3a28bbbd54728": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f2c35ce7b4264cfa80ace120c2a8c2f9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "25469448f7d6428ca97a96f9c9f156b7": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6586ec5c75454671b85bd4659fd9a40e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "11273ae865b44f2b8d3a4363b3fa1178": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e36a5f615ae948148aa1222b0ddc514b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e64d5d3198634727a6a9f3634fb4afb8": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "079339c01ce24e8ab8a61d24b36ee4e8": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_888a7e6f633746be94943016dc9f7c56", "IPY_MODEL_b41df158b85c4222ba2d65279337105d", "IPY_MODEL_3bc4979197e94257a83d7f0deeafa43a" ], "layout": "IPY_MODEL_2ec303b4d09d41ed83102edec73026f0" } }, "888a7e6f633746be94943016dc9f7c56": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6b6b1bdbd8f64ad4b458bede86f7f3c6", "placeholder": "​", "style": "IPY_MODEL_11f49e72f98c4d6aa69247d0e1661a2b", "value": "tech-community.jsonl: 100%" } }, "b41df158b85c4222ba2d65279337105d": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_bf34ba8bf7bc4dbcb7152ecb22229c78", "max": 2136201679, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_06f2266f798544adb127055305f1671f", "value": 2136201679 } }, "3bc4979197e94257a83d7f0deeafa43a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_328ccb5aaad64381acf906d3f2f39434", "placeholder": "​", "style": "IPY_MODEL_1a556d4a74b44803a88eb88b01f78abf", "value": " 2.14G/2.14G [00:58<00:00, 52.8MB/s]" } }, "2ec303b4d09d41ed83102edec73026f0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6b6b1bdbd8f64ad4b458bede86f7f3c6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "11f49e72f98c4d6aa69247d0e1661a2b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "bf34ba8bf7bc4dbcb7152ecb22229c78": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "06f2266f798544adb127055305f1671f": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "328ccb5aaad64381acf906d3f2f39434": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1a556d4a74b44803a88eb88b01f78abf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e954c157384742568aeb1783495a652a": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_6265a7718d4d472f895fa8a55de6facf", "IPY_MODEL_6d550f52e424492f8928886802018388", "IPY_MODEL_ae40a367b9af4d9f8fdd96cbdad2cf27" ], "layout": "IPY_MODEL_e927f83749c6446d8734bb2de88e429b" } }, "6265a7718d4d472f895fa8a55de6facf": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cebf15f03d694f7ca7a38febf76cb054", "placeholder": "​", "style": "IPY_MODEL_cc5663d199724c83b893e3ce965d466d", "value": "azure-updates.jsonl: 100%" } }, "6d550f52e424492f8928886802018388": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f01c1e76140e4797bc79e0880db73304", "max": 18574751, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_34ff77d08c3d43b98d2d3855543a89dd", "value": 18574751 } }, "ae40a367b9af4d9f8fdd96cbdad2cf27": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f08ea4973a31409faf9c07751b5d5dd3", "placeholder": "​", "style": "IPY_MODEL_790fb4ae984446d9bdeec2a2876807ff", "value": " 18.6M/18.6M [00:00<00:00, 76.6MB/s]" } }, "e927f83749c6446d8734bb2de88e429b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cebf15f03d694f7ca7a38febf76cb054": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cc5663d199724c83b893e3ce965d466d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f01c1e76140e4797bc79e0880db73304": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "34ff77d08c3d43b98d2d3855543a89dd": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "f08ea4973a31409faf9c07751b5d5dd3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "790fb4ae984446d9bdeec2a2876807ff": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "185ded41057340a08a5bccc551798f1a": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_5d1143e67464431aa5a96d4c01ae9b73", "IPY_MODEL_d29703f8bf6d49188cb5a2bd3a5629eb", "IPY_MODEL_bdc511cd9dcf44f09af118fa11b9c5c3" ], "layout": "IPY_MODEL_ed8fadcdcde4412b98c68e8d1924eaf0" } }, "5d1143e67464431aa5a96d4c01ae9b73": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9e3eca47a25f43e69bcf0c9fd3962cae", "placeholder": "​", "style": "IPY_MODEL_bac81645f79848c8a7fc30e21ba6fcce", "value": "github-samples.jsonl: 100%" } }, "d29703f8bf6d49188cb5a2bd3a5629eb": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2f03e86202bd4fc9b2d13e67ee0c7447", "max": 171553392, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_eb2f160c34814ff1b7aa8a1ad3d42edb", "value": 171553392 } }, "bdc511cd9dcf44f09af118fa11b9c5c3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3c08de811fbb422eb70ea751acb2e13d", "placeholder": "​", "style": "IPY_MODEL_1626f1da378045399f650e6fb1ba0429", "value": " 172M/172M [00:04<00:00, 39.9MB/s]" } }, "ed8fadcdcde4412b98c68e8d1924eaf0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9e3eca47a25f43e69bcf0c9fd3962cae": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bac81645f79848c8a7fc30e21ba6fcce": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2f03e86202bd4fc9b2d13e67ee0c7447": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "eb2f160c34814ff1b7aa8a1ad3d42edb": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "3c08de811fbb422eb70ea751acb2e13d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1626f1da378045399f650e6fb1ba0429": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f2aef948c3e248bca2a6b4a84d8534b5": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e8e7a3c6cac44588a44ec5f4bd3c2771", "IPY_MODEL_8cd41c5175c546b0a576d7f1f07b31f0", "IPY_MODEL_fdd2a5d14b5045e6b3ff9f585c12c1b1" ], "layout": "IPY_MODEL_c74f16c144274bfc81f5484c9d5abf84" } }, "e8e7a3c6cac44588a44ec5f4bd3c2771": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6663b46404c447ec82eb7b755f46e3f4", "placeholder": "​", "style": "IPY_MODEL_dcdc3d1d68f14c34931e1d257590ca70", "value": "azure-architecture.jsonl: 100%" } }, "8cd41c5175c546b0a576d7f1f07b31f0": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6567ebb2bce04053812751d9bb0db2da", "max": 15568877, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_bbc373b2b34a447c85bc4983b4d62390", "value": 15568877 } }, "fdd2a5d14b5045e6b3ff9f585c12c1b1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_93590d191413459e878207a0176393be", "placeholder": "​", "style": "IPY_MODEL_0a674086b9244ba6ac5400b10c3b7a69", "value": " 15.6M/15.6M [00:00<00:00, 70.2MB/s]" } }, "c74f16c144274bfc81f5484c9d5abf84": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6663b46404c447ec82eb7b755f46e3f4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dcdc3d1d68f14c34931e1d257590ca70": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6567ebb2bce04053812751d9bb0db2da": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bbc373b2b34a447c85bc4983b4d62390": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "93590d191413459e878207a0176393be": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0a674086b9244ba6ac5400b10c3b7a69": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "9QUghTFp3gSE", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "0ffbdf26-4600-408e-ceb4-9c7191d71066" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/454.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m \u001b[32m450.6/454.8 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.8/454.8 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m70.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m266.8/266.8 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m304.2/304.2 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.9/50.9 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.3/129.3 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } ], "source": [ "!pip install -q llama-index==0.12.12 openai==1.59.6 tiktoken==0.8.0" ] }, { "cell_type": "code", "source": [ "# set variables\n", "from google.colab import userdata\n", "\n", "HF_TOKEN = userdata.get('HF_TOKEN2')" ], "metadata": { "id": "hxGaRPQc3vrZ" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Initialise HG\n", "\n", "from huggingface_hub import HfApi\n", "\n", "api = HfApi(token=HF_TOKEN)" ], "metadata": { "id": "87eDAI2w34d4" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ " # Download files\n", "file_name = 'Crawler.zip'\n", "api.hf_hub_download(\n", " filename=file_name,\n", " local_dir=\"./data\",\n", " repo_id=\"vicpada/AzureResources\",\n", " repo_type=\"dataset\"\n", ")\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 67, "referenced_widgets": [ "a0c6705f4fad4f519897e05a5de06a94", "d449a38929414a4cb4bebb2e01fbaf44", "eebed51a9c8b41939c686c261acc7e8a", "0f3a830648d8410894aa3dc47227992f", "07a1742aee284bf5834ca77a5f03d3ee", "789e5125b32f4a8390cff308983b878c", "88eebcdcb5eb45bfbe040c8ccbf87411", "922f135bb50f4144ab0d6abad0df9e69", "68b6bed833c74ccea239002b23c404a4", "3ab181bde71d481fa20d3bd5c5d30803", "5768e9e182a04c08b4e8887bc4bda5f1" ] }, "id": "N45xj1ic4ENn", "outputId": "627e0d3c-38a3-4290-d4d0-e66dbaf72f6e" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Crawler.zip: 0%| | 0.00/440M [00:00 int:\n", " encoding = tiktoken.get_encoding(encoding_name)\n", " num_tokens = len(encoding.encode(string, disallowed_special=()))\n", " return num_tokens\n", "\n", "def remove_copyright_header(content: str) -> str:\n", " header_pattern = re.compile(r\"\\s*\", re.DOTALL)\n", " cleaned_content = header_pattern.sub(\"\", content, count=1)\n", " return cleaned_content.strip()\n", "\n", "def remove_url_and_title_header(content: str) -> str:\n", " header_pattern = re.compile(r\"(?s)^---\\s*(?=.*\\b(url|title):).*?---\\s*\\n*\", re.DOTALL)\n", " cleaned_content = header_pattern.sub(\"\", content, count=1)\n", " return cleaned_content.strip()\n", "\n", "\n", "\n", "def process_files(folder_name:str, folder_path:str, files:List) -> List[dict[str, str]]:\n", " jsonl_data = []\n", " for file_name in files:\n", " file_path = os.path.join(folder_path, file_name)\n", " with open(file_path, 'r', encoding='utf-8') as infile:\n", " content = infile.read()\n", "\n", " # Create a Json object\n", " title = extract_title(content)\n", " token_count = num_tokens_from_string(content, \"cl100k_base\")\n", "\n", " # Extract URL and Title, handling potential None results from re.search\n", " url_match = re.search(r'^url:\\s*\"([^\"]+)\"', content, re.MULTILINE)\n", " extracted_url = url_match.group(1) if url_match else None\n", "\n", " title_match = re.search(r'^title:\\s*\"([^\"]+)\"', content, re.MULTILINE)\n", " extracted_title = title_match.group(1) if title_match else extract_title(content) # Use extract_title function as fallback\n", "\n", " # Skip very small or extremely large files\n", " if token_count < 100 or token_count > 200_000:\n", " print(\n", " f\"Skipping {file_path} due to token count {token_count}\"\n", " )\n", " continue\n", "\n", " cleaned_content = remove_copyright_header(content)\n", " cleaned_content = remove_url_and_title_header(content)\n", "\n", " json_object = {\n", " \"tokens\": token_count,\n", " \"doc_id\" :str(uuid.uuid5(uuid.NAMESPACE_DNS, cleaned_content)),\n", " \"name\": (extracted_title if extracted_title else file_name),\n", " \"url\": extracted_url,\n", " \"retrieve_doc\": (token_count <= 8000),\n", " \"source\": folder_name,\n", " \"content\": cleaned_content,\n", " }\n", " jsonl_data.append(json_object)\n", " return jsonl_data\n", "\n", "\n", "\n", "# Iterate through each folder in the extracted directory\n", "for folder_name in os.listdir(extract_dir):\n", " folder_path = os.path.join(extract_dir, folder_name)\n", "\n", " # Check if it's a directory\n", " if os.path.isdir(folder_path):\n", " jsonl_filename = f\"{folder_name}.jsonl\"\n", " jsonl_filepath = os.path.join(output_dir, jsonl_filename)\n", "\n", " with open(jsonl_filepath, 'w') as outfile:\n", " # Iterate through each file in the current folder\n", " json_data = process_files(folder_name, folder_path, os.listdir(folder_path))\n", " for json_object in json_data:\n", " json_str = json.dumps(json_object)\n", " outfile.write(json_str + '\\n')\n", "\n", " print(f\"Created JSONL file: {jsonl_filepath}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Z2Mn64dN5It6", "outputId": "244b2d68-97e8-4fba-aae1-3a90b31b86aa" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_2658_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22_t5_s_gxcuf89792_rss_Community_%22.md due to token count 61\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_store_b_why-microsoft-store_icid=footer_why-msft-store_7102020_%22.md due to token count 79\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___portal.office.com_landing_%22.md due to token count 57\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_store_b_education_%22.md due to token count 61\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___sharegate.com_microsoft-migration_%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22http___bit.ly_SVS17CHI_%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.avepoint.com_events_webinar_office-365-delegating-administration_%22.md due to token count 69\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___jmp.sh_s_jQ7HKR3DamYmG0AuwYLM_%22.md due to token count 70\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___openssl-library.org_news_secadv_20250211.txt_%22.md due to token count 63\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___choice.microsoft.com_%22.md due to token count 55\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.facebook.com_share.php_u=page.url&t=page-name_%22.md due to token count 67\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___account.microsoft.com_orders_%22.md due to token count 56\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https__go.microsoft.com_fwlink_linkid=2196227%5C%22.md due to token count 66\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https__www.linkedin.com_sharing_share-offsite_url=%7Bpage.url%7D%5C%22.md due to token count 72\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___learn.microsoft.com_en-in_fasttrack_data-migration_%22.md due to token count 62\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_d_surface-laptop-go-2_8PGLPV76MJHN_%22.md due to token count 73\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https__go.microsoft.com_fwlink_p_LinkID=824764&clcid=0x409%5C%22.md due to token count 73\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_2656_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22javascript_void(0)_%22.md due to token count 25\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___cmd.ms__%22.md due to token count 55\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https__go.microsoft.com_fwlink_linkid=2139749%5C%22.md due to token count 66\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___docs.microsoft.com_en-us_powershell_module_microsoft.graph.identity.directorymanagement_update-mgorganizationbranding_view=graph-powershell-beta_%22.md due to token count 82\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_store_b_business-consultation_tab=educationconsultation&icid=CNavfooter_educationconsultation_%22.md due to token count 80\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_2662_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___appsource.microsoft.com_en-us__%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_sitemap1.aspx_%22.md due to token count 61\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_2655_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22http___aka.ms_microsoftpurview_%22.md due to token count 58\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_diversity__%22.md due to token count 61\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_microsoft-365_%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_d_surface-laptop-5_8XN49V61S1BN_%22.md due to token count 74\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___support.microsoft.com__%22.md due to token count 56\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.reddit.com_submit_url=page.url&title=page-name_%22.md due to token count 67\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___learn.microsoft.com_en-us_office_dev_add-ins_develop_automatically-open-a-task-pane-with-a-document_%22.md due to token count 74\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_2661_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_security_%22.md due to token count 58\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___learn.microsoft.com_en-us_microsoftsearch_configure-connector_%22.md due to token count 64\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___cloudpartners.transform.microsoft.com_resources_fasttrack_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___news.microsoft.com__%22.md due to token count 56\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_microsoft-365_business__%22.md due to token count 63\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_investor_default.aspx_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_d_surface-laptop-studio_8SRDF62SWKPF_%22.md due to token count 72\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___learn.microsoft.com_docs__%22.md due to token count 57\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_sustainability__%22.md due to token count 30\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___developer.microsoft.com_en-us__%22.md due to token count 58\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_security_blog_2022_04_19_the-future-of-compliance-and-data-governance-is-here-introducing-microsoft-purview__%22.md due to token count 56\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_education_buy-license_microsoft365_%22.md due to token count 65\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___privacy.microsoft.com_en-us_%22.md due to token count 57\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___help.bittitan.com_hc_en-us_articles_360011170593-Google-Drive-to-OneDrive-for-Business-v2-Migration-Guide_%22.md due to token count 83\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.nucleustechnologies.com_office-365-migration__%22.md due to token count 66\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_industry_%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___azure.microsoft.com_en-us_free_students__%22.md due to token count 61\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___answers.microsoft.com_%22.md due to token count 55\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___techcommunity.microsoft.com_t5_FastTrack-Blog_Microsoft-Ignite-2018-recap-and-join-us-for-Microsoft-Ignite-The_ba-p_269474_%22.md due to token count 90\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___powerplatform.microsoft.com_en-us__%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___bsky.app_intent_compose_text=page-name%21%20%F0%9F%A6%8B%0Apage.url_%22.md due to token count 85\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_accessibility_%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___visualstudio.microsoft.com__%22.md due to token count 57\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___support.office.com_office-training-center_%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_store_b_business_icid=CNavBusinessStore_%22.md due to token count 68\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___twitter.com_hashtag_PowerApps_src=hashtag_click_%22.md due to token count 67\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_d_surface-pro-9_93VKD8NP4FVK_%22.md due to token count 72\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22http___login.microsoftonline.com_443_common_oauth2_v2.0_token_%22.md due to token count 66\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___aka.ms_o365update-youtube_%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___learn.microsoft.com__%22.md due to token count 56\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___twitter.com_share_text=page-name&url=page.url_%22.md due to token count 66\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_2657_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22http___fasttrack.microsoft.com_%22.md due to token count 56\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_d_surface-studio-2plus_8VLFQC3597K4_%22.md due to token count 74\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___azure.microsoft.com_en-us__%22.md due to token count 58\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_39802_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https__go.microsoft.com_fwlink_LinkId=521839%5C%22.md due to token count 64\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___education.microsoft.com__%22.md due to token count 56\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___techcommunity.microsoft.com__%22.md due to token count 27\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_education_%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22javascript_manageConsent();_%22.md due to token count 56\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___fto365dev.blob.core.windows.net_media_Default_Resources_SharePoint%202013%20Migration%20Offer%20FAQ_Public_September2016.pdf_%22.md due to token count 86\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___azuremarketplace.microsoft.com_en-us__%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_education_devices_overview_%22.md due to token count 62\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22http___Server_FQDN_certsrv_mscep_mscep.dll_%22.md due to token count 64\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___answers.microsoft.com_en-us_msoffice_forum_all_migrating-email-from-one-office365-account-to_da67871d-10b3-40d3-9108-cbd0a020c917_%22.md due to token count 96\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___docs.microsoft.com_en-us_sharepointmigration_migrating-content-to-onedrive-for-business_%22.md due to token count 70\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___learn.microsoft.com_en-us_microsoft-365_enterprise_cross-tenant-mailbox-migration_view=o365-worldwide_%22.md due to token count 78\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_about_%22.md due to token count 58\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___azure.microsoft.com_updates_%22.md due to token count 57\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22http___coolesicht.de_likesgivento_fdp.svg_%22.md due to token count 64\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_store_workshops-training-and-events_icid=vl_uf_932020_%22.md due to token count 74\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_d_surface-duo-2_9408KGXP4XJL_%22.md due to token count 73\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___XXXXXX.sharepoint.com_sites_apps_%22.md due to token count 59\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___fto365dev.blob.core.windows.net_media_Default_DocResources_en-us_Office_365_Adoption_Guide.pdf_%22.md due to token count 77\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_store_b_payment-financing-options_icid=footer_financing_vcc_%22.md due to token count 73\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_download_%22.md due to token count 58\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_microsoft-cloud_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22http___fasttrack.microsoft.com_office_envision_productivitylibrary_%22.md due to token count 63\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___account.microsoft.com__%22.md due to token count 56\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___docs.microsoft.com_en-us_answers_products_azure_product=all_%22.md due to token count 66\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___docs.microsoft.com_en-us_microsoft-365_security_office-365-security_step-by-step-guides_step-by-step-guide-overview_view=o365-worldwide_%22.md due to token count 87\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22mailto__body=page.url_%22.md due to token count 57\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_2660_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_39804_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_2663_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___technet.microsoft.com_en-us_library_mt651701.aspx_%22.md due to token count 64\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_windows_windows-11-apps_%22.md due to token count 61\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___dynamics.microsoft.com_en-us__%22.md due to token count 29\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https__setup.cloud.microsoft_Q=TechCommunityBlog%5C%22.md due to token count 64\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___techcommunity.microsoft.com_t5_fasttrack-blog_deploying-microsoft-365-just-got-easier-introducing-advanced_ba-p_3635421_%22.md due to token count 85\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___techcommunity.microsoft.com_t5_microsoft-365-blog_announcing-the-preview-of-the-software-updates-page-in-the_ba-p_3465361_%22.md due to token count 84\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https__go.microsoft.com_fwlink_LinkID=206977%5C%22.md due to token count 35\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___aka.ms_yourcaliforniaprivacychoices_%22.md due to token count 63\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22http___bit.ly_O365AUGChiMU_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___careers.microsoft.com__%22.md due to token count 27\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___myignite.microsoft.com_videos_39803_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___learn.microsoft.com_en-us_microsoftsearch_mssql-connector_%22.md due to token count 64\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___support.microsoft.com_contactus_%22.md due to token count 57\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_education_products_teams_%22.md due to token count 62\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___www.microsoft.com_en-us_microsoft-teams_group-chat-software_%22.md due to token count 65\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https___XXXXXX.sharepoint.com_sites_fasttrack_%22.md due to token count 60\n", "Skipping ./data/extracted/tech-community/techcommunity.microsoft.com_%22https__go.microsoft.com_fwlink_linkid=2196228%5C%22.md due to token count 66\n", "Created JSONL file: ./data/jsonl_output/tech-community.jsonl\n", "Created JSONL file: ./data/jsonl_output/azure-architecture.jsonl\n", "Skipping ./data/extracted/azure-updates/azure.microsoft.com_en-us_pricing_details_databricks_.md due to token count 268457\n", "Created JSONL file: ./data/jsonl_output/azure-updates.jsonl\n", "Skipping ./data/extracted/microsoft-learn/learn.microsoft.com_en-us_azure_governance_policy_samples_built-in-policies.md due to token count 338431\n", "Created JSONL file: ./data/jsonl_output/microsoft-learn.jsonl\n", "Skipping ./data/extracted/github-samples/github.com_Azure-Samples_graphrag-accelerator_blob_157e7af9b8cf29b8ea50019b9aff6bd6f6f1ba0b_backend_poetry.lock.md due to token count 204958\n", "Skipping ./data/extracted/github-samples/github.com_Azure-Samples_openai_blob_21f3d94334c823e42d7adf8578cebc0674363fe1_Solution_Accelerators_Advanced_RAG_src_session_manager_nszrxvuw.component-detection-pip-report.json.md due to token count 388353\n", "Skipping ./data/extracted/github-samples/github.com_Azure-Samples_openai_blob_21f3d94334c823e42d7adf8578cebc0674363fe1_Solution_Accelerators_Advanced_RAG_src_orchestrator_rag_0kanp4rh.component-detection-pip-report.json.md due to token count 423339\n", "Created JSONL file: ./data/jsonl_output/github-samples.jsonl\n" ] } ] }, { "cell_type": "code", "source": [ "# prompt: upload to hugging face\n", "\n", "# List files in the output directory\n", "output_files = [f for f in os.listdir(output_dir) if f.endswith('.jsonl')]\n", "\n", "# Upload each JSONL file to Hugging Face Datasets\n", "for file_name in output_files:\n", " file_path = os.path.join(output_dir, file_name)\n", " try:\n", " api.upload_file(\n", " path_or_fileobj=file_path,\n", " path_in_repo=file_name,\n", " repo_id=\"vicpada/AzureResources\", # Replace with your repo ID\n", " repo_type=\"dataset\",\n", " commit_message=f\"Add {file_name}\"\n", " )\n", " print(f\"Successfully uploaded {file_name}\")\n", " except Exception as e:\n", " print(f\"Error uploading {file_name}: {e}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 264, "referenced_widgets": [ "268c8a22fb2a4d2eab9c3e25de984221", "2d2b4736f11c433585b752139dd70dc9", "80d59424f44a4b199008e0d16343f4b0", "ed3db2b25f0e45a7a9cdf4405b1a87a1", "df1e6015d489417485e3a28bbbd54728", "f2c35ce7b4264cfa80ace120c2a8c2f9", "25469448f7d6428ca97a96f9c9f156b7", "6586ec5c75454671b85bd4659fd9a40e", "11273ae865b44f2b8d3a4363b3fa1178", "e36a5f615ae948148aa1222b0ddc514b", "e64d5d3198634727a6a9f3634fb4afb8", "079339c01ce24e8ab8a61d24b36ee4e8", "888a7e6f633746be94943016dc9f7c56", "b41df158b85c4222ba2d65279337105d", "3bc4979197e94257a83d7f0deeafa43a", "2ec303b4d09d41ed83102edec73026f0", "6b6b1bdbd8f64ad4b458bede86f7f3c6", "11f49e72f98c4d6aa69247d0e1661a2b", "bf34ba8bf7bc4dbcb7152ecb22229c78", "06f2266f798544adb127055305f1671f", "328ccb5aaad64381acf906d3f2f39434", "1a556d4a74b44803a88eb88b01f78abf", "e954c157384742568aeb1783495a652a", "6265a7718d4d472f895fa8a55de6facf", "6d550f52e424492f8928886802018388", "ae40a367b9af4d9f8fdd96cbdad2cf27", "e927f83749c6446d8734bb2de88e429b", "cebf15f03d694f7ca7a38febf76cb054", "cc5663d199724c83b893e3ce965d466d", "f01c1e76140e4797bc79e0880db73304", "34ff77d08c3d43b98d2d3855543a89dd", "f08ea4973a31409faf9c07751b5d5dd3", "790fb4ae984446d9bdeec2a2876807ff", "185ded41057340a08a5bccc551798f1a", "5d1143e67464431aa5a96d4c01ae9b73", "d29703f8bf6d49188cb5a2bd3a5629eb", "bdc511cd9dcf44f09af118fa11b9c5c3", "ed8fadcdcde4412b98c68e8d1924eaf0", "9e3eca47a25f43e69bcf0c9fd3962cae", "bac81645f79848c8a7fc30e21ba6fcce", "2f03e86202bd4fc9b2d13e67ee0c7447", "eb2f160c34814ff1b7aa8a1ad3d42edb", "3c08de811fbb422eb70ea751acb2e13d", "1626f1da378045399f650e6fb1ba0429", "f2aef948c3e248bca2a6b4a84d8534b5", "e8e7a3c6cac44588a44ec5f4bd3c2771", "8cd41c5175c546b0a576d7f1f07b31f0", "fdd2a5d14b5045e6b3ff9f585c12c1b1", "c74f16c144274bfc81f5484c9d5abf84", "6663b46404c447ec82eb7b755f46e3f4", "dcdc3d1d68f14c34931e1d257590ca70", "6567ebb2bce04053812751d9bb0db2da", "bbc373b2b34a447c85bc4983b4d62390", "93590d191413459e878207a0176393be", "0a674086b9244ba6ac5400b10c3b7a69" ] }, "id": "bfPafnarDngV", "outputId": "b9f87732-701f-4508-9ebf-a52e0a4919b0" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "microsoft-learn.jsonl: 0%| | 0.00/270M [00:00