datadesk
diff --git a/‎docs/_build/doctrees/documents.doctree‎
1.86 KB b/‎docs/_build/doctrees/documents.doctree‎
1.86 KB
diff --git a/‎docs/_build/doctrees/environment.pickle‎
475 Bytes b/‎docs/_build/doctrees/environment.pickle‎
475 Bytes
diff --git a/‎docs/_build/html/_sources/documents.txt‎
Lines changed: 10 additions & 0 deletions b/‎docs/_build/html/_sources/documents.txt‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎docs/_build/html/documents.html‎
Lines changed: 13 additions & 0 deletions b/‎docs/_build/html/documents.html‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎docs/_build/html/genindex.html‎
Lines changed: 5 additions & 1 deletion b/‎docs/_build/html/genindex.html‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎docs/_build/html/objects.inv‎
30 Bytes b/‎docs/_build/html/objects.inv‎
30 Bytes
diff --git a/‎docs/_build/html/searchindex.js‎
Lines changed: 1 addition & 1 deletion b/‎docs/_build/html/searchindex.js‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/documents.rst‎
Lines changed: 10 additions & 0 deletions b/‎docs/documents.rst‎
Lines changed: 10 additions & 0 deletions
@@ -156,6 +156,16 @@ Metadata
 
     Returns the URL that contains the full text of the document, as extracted from the original PDF by DocumentCloud. 
 
+.. method:: document_obj.get_page_text(page)
+
+    Submit a page number and receive the raw text extracted from it by DocumentCloud.
+
+    >>> obj = client.documents.get('1088501-adventuretime-alta')
+    >>> txt = obj.get_page_text(1)
+    # Let's print just the first line
+    >>> print txt.decode().split("\n")[0]
+    STATE OF CALIFORNIA- HEALTH AND HUMAN SERVICES AGENCY
+
 .. attribute:: document_obj.id
 
     The unique identifer of the document in DocumentCloud's system. Typically this is a string that begins with a number, like ``83251-fbi-file-on-christopher-biggie-s.malls-wallace``
 
@@ -251,6 +251,19 @@ <h2>Metadata<a class="headerlink" href="#metadata" title="Permalink to this head
 <dd><p>Returns the URL that contains the full text of the document, as extracted from the original PDF by DocumentCloud.</p>
 </dd></dl>
 
+<dl class="method">
+<dt id="document_obj.get_page_text">
+<tt class="descclassname">document_obj.</tt><tt class="descname">get_page_text</tt><big>(</big><em>page</em><big>)</big><a class="headerlink" href="#document_obj.get_page_text" title="Permalink to this definition">¶</a></dt>
+<dd><p>Submit a page number and receive the raw text extracted from it by DocumentCloud.</p>
+<div class="highlight-python"><div class="highlight"><pre><span class="gp">&gt;&gt;&gt; </span><span class="n">obj</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">documents</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s">&#39;1088501-adventuretime-alta&#39;</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">txt</span> <span class="o">=</span> <span class="n">obj</span><span class="o">.</span><span class="n">get_page_text</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
+<span class="go"># Let&#39;s print just the first line</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="k">print</span> <span class="n">txt</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s">&quot;</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
+<span class="go">STATE OF CALIFORNIA- HEALTH AND HUMAN SERVICES AGENCY</span>
+</pre></div>
+</div>
+</dd></dl>
+
 <dl class="attribute">
 <dt id="document_obj.id">
 <tt class="descclassname">document_obj.</tt><tt class="descname">id</tt><a class="headerlink" href="#document_obj.id" title="Permalink to this definition">¶</a></dt>
 
@@ -238,10 +238,14 @@ <h2 id="G">G</h2>
   <dt><a href="projects.html#project_obj.get_document">get_document() (project_obj method)</a>
   </dt>
 
+      
+  <dt><a href="projects.html#client.projects.get_or_create_by_title">get_or_create_by_title() (client.projects method)</a>
+  </dt>
+
   </dl></td>
   <td style="width: 33%" valign="top"><dl>
 
-  <dt><a href="projects.html#client.projects.get_or_create_by_title">get_or_create_by_title() (client.projects method)</a>
+  <dt><a href="documents.html#document_obj.get_page_text">get_page_text() (document_obj method)</a>
   </dt>
 
   </dl></td>
 
@@ -156,6 +156,16 @@ Metadata
 
     Returns the URL that contains the full text of the document, as extracted from the original PDF by DocumentCloud. 
 
+.. method:: document_obj.get_page_text(page)
+
+    Submit a page number and receive the raw text extracted from it by DocumentCloud.
+
+    >>> obj = client.documents.get('1088501-adventuretime-alta')
+    >>> txt = obj.get_page_text(1)
+    # Let's print just the first line
+    >>> print txt.decode().split("\n")[0]
+    STATE OF CALIFORNIA- HEALTH AND HUMAN SERVICES AGENCY
+
 .. attribute:: document_obj.id
 
     The unique identifer of the document in DocumentCloud's system. Typically this is a string that begins with a number, like ``83251-fbi-file-on-christopher-biggie-s.malls-wallace``