Source code for tosixinch.process.inspect_sample


"""Sample functions to use in ``inspect`` action."""

import json
import logging
import re
import urllib.request

logger = logging.getLogger(__name__)


[docs]def get_links(doc, match=''):
    """Print <a href> links, if regex string ``match`` matches.

    usage example:

    .. code-block:: none

        # print jpg files
        inspect=    get_links?jpg$
    """
    for text in doc.xpath('//a/@href'):
        m = re.search(match, text)
        if m:
            print(text)


[docs]def hackernews_topstories(doc):
    """Print hackernews top stories and some data, all commented out.

    Querying https://github.com/HackerNews/API.
    (So it's not using ``doc`` argumant).

    2022/04/30: quite slow (the API server itself is that way, I guess)

    usage example:

    .. code-block:: none

        # only when input is exactly the site home, no glob ('*')
        [hackernews_home]
        match=      https://news.ycombinator.com
        inspect=    hackernews_topstories
    """
    def getjson(url):
        with urllib.request.urlopen(url) as f:
            data = f.read()
        return json.loads(data.decode(encoding='utf-8'))

    url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
    topstories = getjson(url)[:50]

    title = '## TOSIXINCH: HACKERNEWS TOPSTORIES ##'
    heading = '## comments points ##'
    stories = [title, heading]

    for topstory in topstories:
        urlfmt = 'https://hacker-news.firebaseio.com/v0/item/%s.json'
        url = urlfmt % str(topstory)
        st = getjson(url)
        hnurl = 'https://news.ycombinator.com/item?id=%s' % str(topstory)
        if st.get('descendants', 0) < 5 or st['score'] < 5:
            continue
        story = '# %4s %4s\n# %s\n# %s\n# %s\n' % (
                st.get('descendants', '0'), st['score'], st['title'],
                st.get('url', ''), hnurl)
        stories.append(story)

    for s in stories:
        print(s)
Source code for tosixinch.process.inspect_sample

tosixinch

Navigation

Related Topics