Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 215 additions & 0 deletions hw5/.ipynb_checkpoints/regex-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 1. Find all ftp links and write them to **ftp** file"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"with open('references', 'r') as f_in:\n",
" lines = f_in.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"text = ''.join(lines)\n",
"pattern = re.compile(r'ftp\\.[^;\\t\\n]+')\n",
"ftp_links = pattern.findall(text)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"with open('ftps', 'w') as f_out:\n",
" for ftp_link in ftp_links:\n",
" f_out.write(f'{ftp_link}\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 2. Extract all numbers from **2430 A.D.** story"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"with open('2430AD', 'r') as f_in:\n",
" lines = f_in.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2430 1969 2430 2430 57 57 1970 3 68 35 460 2430\n"
]
}
],
"source": [
"text = ''.join(lines)\n",
"pattern = re.compile(r'[0-9]+')\n",
"all_numbers = pattern.findall(text)\n",
"print(*all_numbers)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 3. Extract all words starting with **A** or **a**."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"and and are all and an Alvarez An adjustment along as and Alvarez and athletic and almost an and avoided and and apartment Alvarez All And at against and almost And another as about an and And at all always all and asked and and Alvarez ask as as are and always all and asking accede Alvarez amount and Alvarez and and are all and And and all Alvarez as as amount And asked All All Alvarez against against are and and abandoned all are Alvarez and and Alvarez and and and at and all and as and advances are and art and and advance and animal animal all all animal Alvarez amphibians at all and animals And and are are are adjusted are all an and and after attempt and Alvarez and animals all another already and and are All at all and all acted act And Alvarez Alvarez audible against are and and and And And and amused and and and and and and and And and and and and and and And all as and all and and animals appropriate and and and and also and after all all and all at and ASSET again and at and agree and about about ashamed and as added as and and animal above\n"
]
}
],
"source": [
"pattern = re.compile(r'\\s([Aa]\\w+)\\s')\n",
"words = pattern.findall(text)\n",
"print(*words)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 3. Extract all proposals ending with **!**"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Yes!\n",
"Literally!\n",
"There was once a time!\n",
"Centuries ago!\n",
"Think, Cranwitz!\n",
"If we succeed!\n"
]
}
],
"source": [
"pattern = re.compile('[A-Z][^\\n.!?]+[!]')\n",
"proposals = pattern.findall(text)\n",
"for proposal in proposals:\n",
" print(proposal)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 4. Draw length distribution of unique words with length more than 1 (register does not matter)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"pattern = re.compile('\\s([A-Za-z]+)')\n",
"all_words = pattern.findall(text)\n",
"all_words_capitalized = [word.lower() for word in all_words]\n",
"unique_words= set(all_words_capitalized)\n",
"length_ls = [len(word) for word in unique_words]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtkAAAJSCAYAAAAWOtmFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3de7SkZ10n+u9PGoKkUS6BNiTxhCOxR2Tk1gdBvHRADheR4MzgEI8SASe6BGHOcY7CMEsbGI5REBzXKBoBCQhE5LIIiEgIbDiegQDhEi4xEu5Neojc06CBwO/8UdWwbbrTu7qfqtq1+/NZa69d9bxPvfWrZ9Xa+ebp533e6u4AAADjfMeyCwAAgK1GyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBti27gNFOOumkPv3005ddxqb35S9/OSeeeOKyy9iSjO38GNv5MbbzY2znx9jOj7HdmMsuu+wz3X2bQx3bciH79NNPzzvf+c5ll7Hpra2tZffu3csuY0sytvNjbOfH2M6PsZ0fYzs/xnZjqurjhztmuQgAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBg25ZdAKyUPXuO3Gfnzo31WxVb6bMAwIIsdCa7qm5aVW+vqvdW1Qeq6snT9ttX1aVV9aGq+suqusm0/YTp86umx09fZL0AAHA0Fr1c5Lok9+nuOye5S5IHVNU9k/xukmd19xlJPp/k0dP+j07y+e6+Q5JnTfsBAMCmttCQ3RP7p09vPP3pJPdJ8rJp+wVJHjp9fNb0eabH71tVtaByAQDgqCz8wsequlFVvSfJNUkuTvLhJF/o7uunXfYmOWX6+JQkn0yS6fEvJrn1YisGAIDZVHcv542rbpHklUl+K8mfT5eEpKpOS/La7v7XVfWBJPfv7r3TYx9Oco/u/uxB5zo3yblJsmPHjrtfeOGFC/wkq2n//v3Zvn37sstYPfv2HbHL/hNOyPbrrltAMQty8snLruCbfG/nx9jOj7GdH2M7P8Z2Y84888zLunvXoY4tbXeR7v5CVa0luWeSW1TVtuls9alJrp5225vktCR7q2pbku9O8rlDnOv8JOcnya5du3r37t3z/wArbm1tLcbpKGxgp421nTuz+8or51/Lopx99rIr+Cbf2/kxtvNjbOfH2M6PsT12i95d5DbTGexU1Xcm+ckkVyR5U5J/N+12TpJXTR9fNH2e6fE39rKm3gEAYIMWPZN9cpILqupGmQT8l3b3a6rqg0kurKr/muTdSZ477f/cJC+sqqsymcF++ILrBQCAmS00ZHf35Unueoj2jyS5xyHa/znJwxZQGgAADOO26gAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADDYQkN2VZ1WVW+qqiuq6gNV9fhp+56q+lRVvWf686B1r3liVV1VVVdW1f0XWS8AAByNbQt+v+uT/Hp3v6uqbp7ksqq6eHrsWd39jPWdq+qOSR6e5AeT3C7JG6rq+7v76wutGgAAZrDQmezu3tfd75o+vjbJFUlOuYGXnJXkwu6+rrs/muSqJPeYf6UAAHD0lrYmu6pOT3LXJJdOmx5bVZdX1fOq6pbTtlOSfHLdy/bmhkM5AAAsXXX34t+0anuSNyd5Wne/oqp2JPlMkk7y1CQnd/ejquqPkry1u/9i+rrnJnltd7/8oPOdm+TcJNmxY8fdL7zwwgV+mtW0f//+bN++fdllrJ59+47YZf8JJ2T7ddctoJgFOfnkZVfwTb6382Ns58fYzo+xnR9juzFnnnnmZd2961DHFr0mO1V14yQvT/Ki7n5FknT3p9cd/7Mkr5k+3ZvktHUvPzXJ1Qefs7vPT3J+kuzatat37949l9q3krW1tRino7BnzxG7rO3cmd1XXjn/Whbl7LOXXcE3+d7Oj7GdH2M7P8Z2foztsVv07iKV5LlJrujuZ65rXz9V9jNJ3j99fFGSh1fVCVV1+yRnJHn7ouoFAICjseiZ7Hsn+YUk76uq90zb/nOSs6vqLpksF/lYkl9Oku7+QFW9NMkHM9mZ5DF2FgEAYLNbaMju7r9LUoc49NobeM3TkjxtbkUBAMBg7vgIAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMNhCQ3ZVnVZVb6qqK6rqA1X1+Gn7rarq4qr60PT3LaftVVV/WFVXVdXlVXW3RdYLAABHY9Ez2dcn+fXu/oEk90zymKq6Y5InJLmku89Icsn0eZI8MMkZ059zkzx7wfUCAMDMFhqyu3tfd79r+vjaJFckOSXJWUkumHa7IMlDp4/PSvKCnnhbkltU1cmLrBkAAGa1tDXZVXV6krsmuTTJju7el0yCeJLbTrudkuST6162d9oGAACbVnX34t+0anuSNyd5Wne/oqq+0N23WHf88919y6r66yS/091/N22/JMlvdPdlB53v3EyWk2THjh13v/DCCxf2WVbV/v37s3379mWXsXr27Ttil/0nnJDt1123gGIW5OTN849HvrfzY2znx9jOj7GdH2O7MWeeeeZl3b3rUMe2LbqYqrpxkpcneVF3v2La/OmqOrm7902Xg1wzbd+b5LR1Lz81ydUHn7O7z09yfpLs2rWrd+/ePa/yt4y1tbUYp6OwZ88Ru6zt3JndV145/1oW5eyzl13BN/nezo+xnR9jOz/Gdn6M7bFb9O4ileS5Sa7o7meuO3RRknOmj89J8qp17Y+Y7jJyzyRfPLCsBAAANqtFz2TfO8kvJHlfVb1n2vafk5yX5KVV9egkn0jysOmx1yZ5UJKrknwlySMXWy4AAMxuoSF7ura6DnP4vofo30keM9eiAABgMHd8BACAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBNhyyq+o/VNWJ8ywGAAC2gllmsv8kydVV9UdV9UPzKggAAFbdLCH7+5L8cZJ/k+TdVfXWqjqnqm46n9IAAGA1bThkd/fHuvuJSU5L8vAkX0nyvExmt59VVT8wpxoBAGClzHzhY3df391/1d33TbIzyeVJHpfk/VX15qr6qdFFAgDAKjmq3UWq6uZV9atJXp7kx5O8O8mTkmxLclFVPWVciQAAsFpmCtlVtauq/izJ1UmekeQ9Se7V3bu6+7zuvneSPUkeM7xSAABYEbNs4XdZkkuTnJnkKUlO7e5zuvvSg7penOSW40oEAIDVsm2Gvlcn+S9JXtfdfQP93pXk9sdUFQAArLANh+zu/ukN9vtqko8fdUUAALDiZlku8siq2nOYY3uq6pxhVQEAwAqb5cLHxyf57GGOXZPkPx57OQAAsPpmCdl3SPKBwxy7IpM7QgIAwHFvlpB9fZKTDnPsNgNqAQCALWGWkP32JL9ymGO/kuQdx14OAACsvlm28HtakjdU1aVJnpPkU0lOSfJLSe6W5H7jywMAgNUzyxZ+b66qf5fkD5L86bpDH0vyb7t7bWxpAACwmmaZyU53vyrJq6pqZ5JbJ/lMd//DXCoDAIAVNVPIPqC7rxxdCAAAbBUzheyq+q4kD0ryvUluetDh7u6njioMAABW1YZDdlXdO8mrk9ziMF06iZANAMBxb5Yt/P4gk4sc/7ckN+3u7zjo50ZzqRAAAFbMLMtFfiDJz3b3ZfMqBgAAtoJZZrI/keSEeRUCAABbxSwh+8lJnjC9+BEAADiMWZaLPDjJjiQfraq3JvncQce7u88ZVhkAAKyoWUL2j2ayg8iXkvzgIY73kIoAAGDFzXJb9dvPsxAAANgqZlmTDQAAbMBMIbuqTqyqx1XVy6rqTVV1xrT94VX1r+ZTIgAArJZZ7vh4WpK1JKcm+fskd0py8+nhM5P8ZJJfGlwfAACsnFlmsn8/yXVJzkhy9yS17tibk/z4wLoAAGBlzbK7yP2SnNvdn6iqg2+h/qkkp4wrCwAAVtcsM9k3SXLtYY59d5KvHXs5AACw+mYJ2Zcn+beHOfbAJJcdezkAALD6Zlku8vQkL6uqJHnxtO2OVXVWkkcnecjg2gBYlH37kj17ll3FYh1vnxdYqFluRvOKqvrVJOcledS0+QWZLCF5bHe/bg71AQDAypllJjvd/SdV9cIk90py2ySfTfI/uvtwa7UBAOC4M1PITpLu/nKSN8yhFgAA2BJmuRnNEffB7u63HFs5AACw+maZyV5L0kfoc/D+2QAAcNyZJWSfeYi2Wyd5cJKfSPLYIRUBAMCKm2V3kTcf5tArqupZSX46yd8MqQoAAFbYLDejuSF/neRnB50LAABW2qiQvTPJNwadCwAAVtosu4s84hDNN0lyp0zu+PiKUUUBAMAqm+XCx+cfpv26JH+Z5PHHXA0AAGwBs4Ts2x+i7Z+7+9OjigEAgK1glt1FPj7PQgAAYKsYdeEjAAAwteGQXVXfqKqvb/Dn+sOc43lVdU1VvX9d256q+lRVvWf686B1x55YVVdV1ZVVdf9j+6gAALAYs6zJfmqSRyb5ziSvTvLpJN+TyR0fv5Lkz3Pk264/P8l/T/KCg9qf1d3PWN9QVXdM8vAkP5jkdkneUFXf391fn6FmAABYuFlC9teSfDzJ/bv7Kwcaq+rEJH+b5Gvd/bQbOkF3v6WqTt/g+52V5MLuvi7JR6vqqiT3SPLWGWoGAICFm2VN9i8nefr6gJ0k3f3lJM9I8ivHUMdjq+ry6XKSW07bTknyyXV99k7bAABgU5tlJvukTG4+cyg3SXLro6zh2ZksRenp799P8qgkdYi+h1yOUlXnJjk3SXbs2JG1tbWjLOX4sX//fuN0NHbuPGKX/SeckLUN9FsZm+h74ns7P1vue7sRC/ou+d7Oj7GdH2N77GYJ2e9M8uSqemt3f+pAY1WdkmRPknccTQHr99muqj9L8prp071JTlvX9dQkVx/mHOcnOT9Jdu3a1bt37z6aUo4ra2trMU5HYc+eI3ZZ27kzu6+8cv61LMrZZy+7gm/yvZ2ftZe8ZGt9bzdiQd9t39v5MbbzY2yP3SzLRR6XyQWIH66qtar6y6paS/LhTC6APKo7PlbVyeue/kySAzuPXJTk4VV1QlXdPskZSd5+NO8BAACLNMvNaN5dVXdI8n8luWeSf51kXybrsZ/V3Z890jmq6iVJdic5qar2JvntJLur6i6ZLAX5WCZrv9PdH6iqlyb5YJLrkzzGziIAAKyCWZaLZBqkn3S0b9bdh/q3uefeQP+nJbnBHUsAAGCzmSlkJ0lVnZTJTPatk7y6uz9XVTdN8tXu/sboAgEAYNXMcsfHqqqnZ3JB4kVJnpfk9OnhV+UYZrgBAGArmeXCxycmeWySpyT54fzLLfZencmdHwEA4Lg3y3KRX0rylO7+naq60UHHrkryfePKAgCA1TXLTPYpSd52mGNfTXLisZcDAACrb5aQ/akkdzrMsTsn+eixlwMAAKtvlpD9V0l+q6ruva6tq+r7k/x6kguHVgYAACtqlpC9J8nfJ3lLkg9N2/4qyfumz88bWhkAAKyoWe74+E9VtTvJzyW5fyYXO342yVOTvKi7r59LhQAAsGI2FLKr6sZJHpTk8u5+YZIXzrUqAABYYRtaLtLdX0vy0nzr5jMAAMBhzLIm+yNJbjuvQgAAYKuY5WY0v5fkSVX1xu7+x3kVBGwye/Ysu4Jv2blzMfVsps8MwEqaJWTfJ8mtkny0qt6WZF+SXne8u/uckcUBAMAqmiVk/2iSryX5x0xuoX7wbdT7214BAADHoRsM2VX1uSQ/2d3vSrKW5Cnd7c6OAABwA4504eOJSU6YPn5EktvMtxwAAFh9R1ou8vEk/6GqTkhSSe5aVTc9XOfufsvI4gAAYBUdKWSfl+RPk5yTyZrrPz5Mv5oev9G40gAAYDXdYMju7udV1d8k+f4kb0ryuCRXLKIwAABYVUfcXaS79yXZV1UXJPlrFz4CAMAN2/AWft39yHkWAgAAW8Ust1UHAAA2QMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgME2fFt1gOPGnj3LrmDxdu5cdgUAW4qZbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGGyhIbuqnldV11TV+9e13aqqLq6qD01/33LaXlX1h1V1VVVdXlV3W2StAABwtBY9k/38JA84qO0JSS7p7jOSXDJ9niQPTHLG9OfcJM9eUI0AAHBMFhqyu/stST53UPNZSS6YPr4gyUPXtb+gJ96W5BZVdfJiKgUAgKO3GdZk7+jufUky/X3bafspST65rt/eaRsAAGxq25ZdwA2oQ7T1ITtWnZvJkpLs2LEja2trcyxra9i/f79xOho7dx6xy/4TTsjaBvoxO2M7P8fl2C7ob6C/t/NjbOfH2B67zRCyP11VJ3f3vulykGum7XuTnLau36lJrj7UCbr7/CTnJ8muXbt69+7dcyx3a1hbW4txOgp79hyxy9rOndl95ZXzr+U4ZGzn57gc27PPXsjb+Hs7P8Z2foztsdsMy0UuSnLO9PE5SV61rv0R011G7pnkiweWlQAAwGa20JnsqnpJkt1JTqqqvUl+O8l5SV5aVY9O8okkD5t2f22SByW5KslXkjxykbUCAMDRWmjI7u7D/dvcfQ/Rt5M8Zr4VAQDAeJthuQgAAGwpQjYAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADCZkAwDAYNuWXQArbs+eZVcAALDpmMkGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGGzbsgs4oKo+luTaJF9Pcn1376qqWyX5yySnJ/lYkp/t7s8vq0YAANiIzTaTfWZ336W7d02fPyHJJd19RpJLps8BAGBT22wh+2BnJblg+viCJA9dYi0AALAhmylkd5LXV9VlVXXutG1Hd+9Lkunv2y6tOgAA2KDq7mXXkCSpqtt199VVddskFyf5tSQXdfct1vX5fHff8hCvPTfJuUmyY8eOu1944YWLKntl7d+/P9u3bz/2E+3bd+zn2GL2n3BCtl933bLL2JKM7fwcl2N78skLeZthf2/5NsZ2foztxpx55pmXrVvm/C9smgsfu/vq6e9rquqVSe6R5NNVdXJ376uqk5Ncc5jXnp/k/CTZtWtX7969e0FVr661tbUMGac9e479HFvM2s6d2X3llcsuY0sytvNzXI7t2Wcv5G2G/b3l2xjb+TG2x25TLBepqhOr6uYHHif535O8P8lFSc6ZdjsnyauWUyEAAGzcZpnJ3pHklVWVTGp6cXe/rqrekeSlVfXoJJ9I8rAl1ggAABuyKUJ2d38kyZ0P0f7ZJPddfEUAAHD0NsVyEQAA2EqEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwbYtuwAAWIo9exbzPjt3Lu69jmSz1AHHATPZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAg9nCDwCOF1ttC7+NbI+41T4zK8NMNgAADCZkAwDAYEI2AAAMJmQDAMBgQjYAAAwmZAMAwGBCNgAADGaf7JFWaS/OjewtCgDAUTGTDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAIMJ2QAAMJiQDQAAgwnZAAAwmJANAACDCdkAADCYkA0AAINtW3YBAAAMsmfPmPPs3DnuXPO2Ses0kw0AAIMJ2QAAMJiQDQAAgwnZAAAw2Epc+FhVD0jy35LcKMlzuvu8JZcEAKyCTXpRHFvfpp/JrqobJfmjJA9McsckZ1fVHZdbFQAAHN6mD9lJ7pHkqu7+SHd/NcmFSc5ack0AAHBYqxCyT0nyyXXP907bAABgU1qFNdl1iLb+Fx2qzk1y7vTp/qq6cu5Vrb6Tknxm2UVsUcZ2fozt/Bjb+TG282Ns52d1xvbJT17mu/8vhzuwCiF7b5LT1j0/NcnV6zt09/lJzl9kUauuqt7Z3buWXcdWZGznx9jOj7GdH2M7P8Z2foztsVuF5SLvSHJGVd2+qm6S5OFJLlpyTQAAcFibfia7u6+vqscm+dtMtvB7Xnd/YMllAQDAYW36kJ0k3f3aJK9ddh1bjOU182Ns58fYzo+xnR9jOz/Gdn6M7TGq7j5yLwAAYMNWYU02AACsFCH7OFNVp1XVm6rqiqr6QFU9ftk1bSVVdaOqendVvWbZtWw1VXWLqnpZVf399Pt7r2XXtFVU1f85/Xvw/qp6SVXddNk1raqqel5VXVNV71/XdququriqPjT9fctl1riqDjO2T5/+Tbi8ql5ZVbdYZo2r6lBju+7Yf6qqrqqTllHbKhOyjz/XJ/n17v6BJPdM8hi3qR/q8UmuWHYRW9R/S/K67v5XSe4c4zxEVZ2S5HFJdnX3nTK5wPzhy61qpT0/yQMOantCkku6+4wkl0yfM7vn59vH9uIkd+ruH0ryD0meuOiitojn59vHNlV1WpL7JfnEogvaCoTs40x37+vud00fX5tJUHEHzQGq6tQkP5XkOcuuZaupqu9K8uNJnpsk3f3V7v7CcqvaUrYl+c6q2pbkZjnoXgRsXHe/JcnnDmo+K8kF08cXJHnoQovaIg41tt39+u6+fvr0bZncS4MZHeZ7myTPSvIbOegmgGyMkH0cq6rTk9w1yaXLrWTL+INM/hh9Y9mFbEH/a5J/TPLn0+U4z6mqE5dd1FbQ3Z9K8oxMZqr2Jflid79+uVVtOTu6e18ymehIctsl17NVPSrJ3yy7iK2iqh6S5FPd/d5l17KqhOzjVFVtT/LyJP+xu7+07HpWXVU9OMk13X3ZsmvZorYluVuSZ3f3XZN8Of7JfYjp+uCzktw+ye2SnFhVP7/cqtpxVk0AAAa9SURBVGA2VfWkTJZDvmjZtWwFVXWzJE9K8lvLrmWVCdnHoaq6cSYB+0Xd/Ypl17NF3DvJQ6rqY0kuTHKfqvqL5Za0pexNsre7D/yry8syCd0cu59M8tHu/sfu/lqSVyT5kSXXtNV8uqpOTpLp72uWXM+WUlXnJHlwkv+j7Us8yvdl8j/e753+d+3UJO+qqu9ZalUrRsg+zlRVZbKu9Yrufuay69kquvuJ3X1qd5+eyUVjb+xus4GDdPf/TPLJqto5bbpvkg8usaSt5BNJ7llVN5v+fbhvXFQ62kVJzpk+PifJq5ZYy5ZSVQ9I8ptJHtLdX1l2PVtFd7+vu2/b3adP/7u2N8ndpn+L2SAh+/hz7yS/kMlM63umPw9adlGwAb+W5EVVdXmSuyT5f5Zcz5Yw/deBlyV5V5L3ZfLfBXd6O0pV9ZIkb02ys6r2VtWjk5yX5H5V9aFMdmo4b5k1rqrDjO1/T3LzJBdP/3v2J0stckUdZmw5Ru74CAAAg5nJBgCAwYRsAAAYTMgGAIDBhGwAABhMyAYAgMGEbIAtqKpOr6quql88Qr89VbUptpmqql+sqkcdpr2r6g7LqAvgaAjZAGwWv5jk20I2wCoSsgFWVFWdsOwaADg0IRtggKraNV3S8KPr2n5t2vZf17WdMW170Lq2e1TVG6pqf1V9uaouqap7HHT+50/vxHavqvofVfVPSX5veuxmVfXHVfXZ6TkuSnLqMXyWbVX1xKr6+6q6rqqurqrfr6qbrutzYDnKL1fVU6pqX1V9oapeXVWnHnS+m1XVs6f1XVtVr6yqH1m/nKWq1pL8RJJ7T9t72rbeSVX1oqr60rSmP1xfE8BmImQDjPGuJF9Icp91bfdJ8k+HaPt6kv83Sarqh5K8OcktM1ku8Ygk35XkzVV154Pe47uTXJjkJUkemOTF0/Y/TfJLSZ6Z5N8kuXLdsaPxF0n+y/QcP5Xkd5I8OsmLDtH3iUnukMkyj8cnudch+p0/Pf6MdfUd3OdXk7w7yeXTc9xr2rbeC5N8eHqOZyd5zPT9ATadbcsuAGAr6O5vVNVbkpyZ5ClV9R2ZzMw+O8njqmp7d++fHn9nd187felvJbkuyX27+wtJUlUXJ/lYkt/OJFAesD3Jz3f3qw40VNXOJD+X5Endfd60+fVVtT3Jr8z6Oarqx5L8+yTndPcLps1vqKrPJfmLqrpLd79n3Us+3t0/t+71t0ny9Kq6XXdfva6+J3T37027XVxVN0vya+vG74NV9aUk27r7bYcp78Xd/dvravrhJGdnMk4Am4qZbIBx3pTkXtMlDHdJcotMlnRcl+THpn12J3njutf8eJLXHAjYSdLdX0pyUSYhfb3rk7zmoLYfzuRv+UsPar/wKD/DA5J8NcnLp8tGtlXVtiSvX1fven990PP3TX9/77r6KslfHdTvZUdR26He63sP1RFg2cxkA4zzxiQnJPmRJHdN8t7u/nRV/V2SM6vqE0l2ZBLGD7hVkn2HONf/zGQJyXrXdPfXD2o7efr70we1H/x8o26b5CZJ9h/m+K0Pev65g55fN/19YK30gfquOajf0dR3qPdy8SewKQnZAOO8L8lnMll3fdd8a8b6jUl+NsknM5kl/v/WveZzSb7nEOf6nnx7qDzUftYHAvqOJB9Z175jlsLX+WySf863Zt4PdvWM5ztQ322TfHRd+9HWB7ASLBcBGKS7O5OLGO+XSUhdH7LvmuRnklza3V9Z97I3J/mpqrr5gYbp45+eHjuSS5N8I5MQv97Dj+YzJHldJrPQ393d7zzEz6wh+9JM/ufgYQe1H/w8mcxMf+fsJQNsPmayAcZ6Y5I/yrodRDLZeeRLmV4UeVD/pyZ5cJJLqup3Mwmkv5nkZofo+226+8qqenG+dbHlOzIJ+Q+64Vce9nxrVfWSJC+rqmcmeXsmIf706Tl/s7v/YYbzHajvqdP6Lstkpv+np12+sa77B5P8alX9+0x2Ebm2u688ms8BsGxCNsBYB9Zbv3N6AeP6nUcekn+5HjvdfXlV7U7ytCQXZHKR4NuS/ER3v3eD7/nLmayh/k+ZrKd+YyY7evzdUX6Gn89k549HJXlSJjPMH0vytzm6tdTnJrk2yW+sq+8xmVzE+cV1/X43yc4kz8lkJ5U3Z3KhKMDKqcm/bgLA4lTV/51JqD69uz+x7HoARjOTDcBcVdWDk9wpyXsyWR7yY5nMur9UwAa2KiEbgHm7NslDkzwhyYlJPpXkD+MmMsAWZrkIAAAMZgs/AAAYTMgGAIDBhGwAABhMyAYAgMGEbAAAGEzIBgCAwf5/+a2SrVD0CRsAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 864x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(12, 10));\n",
"plt.hist(length_ls, color='red', alpha=.5);\n",
"plt.xlabel('word length', fontsize=16);\n",
"plt.ylabel('frequency', fontsize=16);\n",
"plt.grid();"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading