|
6 | 6 | "source": [ |
7 | 7 | "The CLTK has a distributed infrastructure that lets you download official CLTK texts or other corpora shared by others. For full docs, see <http://docs.cltk.org/en/latest/importing_corpora.html>.\n", |
8 | 8 | "\n", |
9 | | - "To get started, from the Terminal, open a new Jupyter notebook from within your `~/cltk` directory (see notebook 1 for instructions): `jupyter notebook`. Then go to <http://localhost:8888>." |
| 9 | + "To get started, from the Terminal, open a new Jupyter notebook from within your `~/cltk` directory (see notebook 1 \"CLTK Setup\" for instructions): `jupyter notebook`. Then go to <http://localhost:8888>." |
10 | 10 | ] |
11 | 11 | }, |
12 | 12 | { |
|
20 | 20 | }, |
21 | 21 | { |
22 | 22 | "cell_type": "code", |
23 | | - "execution_count": 1, |
24 | | - "metadata": { |
25 | | - "collapsed": true |
26 | | - }, |
| 23 | + "execution_count": 2, |
| 24 | + "metadata": {}, |
27 | 25 | "outputs": [], |
28 | 26 | "source": [ |
29 | | - "# this is the import of the right part of the CLTK library\n", |
| 27 | + "# This is the import of the right part of the CLTK library\n", |
| 28 | + "\n", |
30 | 29 | "from cltk.corpus.utils.importer import CorpusImporter" |
31 | 30 | ] |
32 | 31 | }, |
33 | 32 | { |
34 | 33 | "cell_type": "code", |
35 | | - "execution_count": 2, |
36 | | - "metadata": { |
37 | | - "collapsed": true |
38 | | - }, |
| 34 | + "execution_count": 3, |
| 35 | + "metadata": {}, |
39 | 36 | "outputs": [], |
40 | 37 | "source": [ |
41 | 38 | "# See https://github.com/cltk for all official corpora\n", |
42 | 39 | "\n", |
43 | 40 | "my_latin_downloader = CorpusImporter('latin')\n", |
44 | 41 | "\n", |
45 | | - "# 'my_latin_downloader' is the variable by which we now call the CorpusImporter" |
| 42 | + "# Now 'my_latin_downloader' is the variable by which we call the CorpusImporter" |
46 | 43 | ] |
47 | 44 | }, |
48 | 45 | { |
49 | 46 | "cell_type": "code", |
50 | | - "execution_count": 3, |
| 47 | + "execution_count": 4, |
51 | 48 | "metadata": {}, |
52 | 49 | "outputs": [ |
53 | 50 | { |
|
70 | 67 | " 'latin_text_poeti_ditalia']" |
71 | 68 | ] |
72 | 69 | }, |
73 | | - "execution_count": 3, |
| 70 | + "execution_count": 4, |
74 | 71 | "metadata": {}, |
75 | 72 | "output_type": "execute_result" |
76 | 73 | } |
|
88 | 85 | }, |
89 | 86 | { |
90 | 87 | "cell_type": "code", |
91 | | - "execution_count": 4, |
92 | | - "metadata": { |
93 | | - "collapsed": true |
94 | | - }, |
| 88 | + "execution_count": 5, |
| 89 | + "metadata": {}, |
95 | 90 | "outputs": [], |
96 | 91 | "source": [ |
97 | 92 | "my_latin_downloader.import_corpus('latin_text_latin_library')\n", |
|
335 | 330 | "cell_type": "markdown", |
336 | 331 | "metadata": {}, |
337 | 332 | "source": [ |
338 | | - "# Convert TEI XML corpus\n", |
| 333 | + "# Convert TEI XML texts\n", |
339 | 334 | "\n", |
340 | | - "Here we'll convert the 1K Years' Greek corpus from TEI XML to plaintext" |
| 335 | + "Here we'll convert the First 1K Years' Greek corpus from TEI XML to plain text." |
341 | 336 | ] |
342 | 337 | }, |
343 | 338 | { |
|
358 | 353 | "outputs": [], |
359 | 354 | "source": [ |
360 | 355 | "#! If you get the following error: 'Install `bs4` and `lxml` to parse these TEI files.'\n", |
361 | | - "# then run: `pip install bs4 lxml`\n", |
| 356 | + "# then run: `pip install bs4 lxml`.\n", |
362 | 357 | "\n", |
363 | 358 | "onekgreek_tei_xml_to_text()" |
364 | 359 | ] |
|
377 | 372 | } |
378 | 373 | ], |
379 | 374 | "source": [ |
380 | | - "# count the converted plaintext files:\n", |
| 375 | + "# Count the converted plaintext files\n", |
| 376 | + "\n", |
381 | 377 | "!ls -l ~/cltk_data/greek/text/greek_text_first1kgreek_plaintext/ | wc -l" |
382 | 378 | ] |
383 | 379 | }, |
384 | 380 | { |
385 | 381 | "cell_type": "markdown", |
386 | 382 | "metadata": {}, |
387 | 383 | "source": [ |
388 | | - "# Import local corpus" |
| 384 | + "# Import local corpora" |
389 | 385 | ] |
390 | 386 | }, |
391 | 387 | { |
|
438 | 434 | } |
439 | 435 | ], |
440 | 436 | "source": [ |
441 | | - "!ls -l /home/kyle/cltk_data/originals/" |
| 437 | + "!ls -l /home/kyle/cltk_data/originals/ # To be adapted to your file system" |
442 | 438 | ] |
443 | 439 | } |
444 | 440 | ], |
|
458 | 454 | "name": "python", |
459 | 455 | "nbconvert_exporter": "python", |
460 | 456 | "pygments_lexer": "ipython3", |
461 | | - "version": "3.6.1" |
| 457 | + "version": "3.6.4" |
462 | 458 | } |
463 | 459 | }, |
464 | 460 | "nbformat": 4, |
|
0 commit comments