Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion ExplainToMe/forms.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from flask_wtf import FlaskForm as Form
from wtforms.fields.html5 import IntegerField, URLField
from wtforms.fields import SelectField
from wtforms.validators import NumberRange, InputRequired
from wtforms.validators import URL, Required

Expand All @@ -16,5 +17,12 @@ class LinkForm(Form):
'max_sent',
validators=[Required(), InputRequired(), NumberRange(min=0, max=100)],
id='max_sent',
default=10,
default=10
)
language = SelectField(
'language',
validators=[Required(), InputRequired()],
id='language',
choices=[('english','English'), ('chinese', 'Chinese'), ('czech', 'Czech'), ('french', 'French'), ('german', 'German'), ('japanese', 'Japanese'), ('portuguese', 'Portuguese'), ('slovak', 'Slovak'), ('spanish', 'Spanish')],
default='english'
)
103 changes: 41 additions & 62 deletions ExplainToMe/templates/base.html
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
{% extends "bootstrap/base.html" %}

{% import "bootstrap/fixes.html" as fixes %}
{% import "bootstrap/google.html" as google %}
{% extends "bootstrap/base.html" %} {% import "bootstrap/fixes.html" as fixes %} {% import "bootstrap/google.html" as google
%}

<!DOCTYPE html>
<html lang="en">
<head>
{% block head %}

{% block meta %}
{% include "includes/meta.html" %}
{% endblock %}
<head>
{% block head %} {% block meta %} {% include "includes/meta.html" %} {% endblock %}


<title>
Expand All @@ -19,73 +14,57 @@


{% block favicon %}
<link type="image/x-icon" rel="icon" href="/favicon.ico" />
{% endblock favicon %}
<link type="image/x-icon" rel="icon" href="/favicon.ico" /> {% endblock favicon %} {% block styles %}
<link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" />

{% block styles %}
<link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" />
<link href="https://bootswatch.com/readable/bootstrap.min.css" rel="stylesheet" />
<link href="{{ url_for('static', filename='css/style.css') }}" rel=stylesheet />
<link href="{{ url_for('static', filename='css/animation.css') }}" rel=stylesheet />
{% endblock styles %}

{{ fixes.ie8() }}
{{ google.uanalytics('UA-51412306-6') }}

{% endblock %}
<link href="https://bootswatch.com/3/readable/bootstrap.min.css" rel="stylesheet" />
<link href="{{ url_for('static', filename='css/style.css') }}" rel=stylesheet />
<link href="{{ url_for('static', filename='css/animation.css') }}" rel=stylesheet /> {% endblock styles %} {{ fixes.ie8() }} {{ google.uanalytics('UA-51412306-6') }} {% endblock %}
</head>

<body>
{% block body %}

{% block navbar %}
{% include 'includes/nav.html' %}
{% endblock %}

<a id="run" name="run"></a>
<div class="intro-header">
<div class="container">
<div class="row">
{% block run %}{% endblock run %}
{% block body %} {% block navbar %} {% include 'includes/nav.html' %} {% endblock %}

<a id="run" name="run"></a>
<div class="intro-header">
<div class="container">
<div class="row">
{% block run %}{% endblock run %}
</div>
</div>
</div>
</div>

<a id="summary" name="summary"></a>
<div class="content-section-a">
<div class="container">
<div class="row">
{% block summary %}{% endblock summary %}
<a id="summary" name="summary"></a>
<div class="content-section-a">
<div class="container">
<div class="row">
{% block summary %}{% endblock summary %}
</div>
</div>
</div>
</div>

<a id="about" name="about"></a>
<div class="banner">
<div class="container">
<div class="row">
{% block about %}{% endblock about %}
<a id="about" name="about"></a>
<div class="banner">
<div class="container">
<div class="row">
{% block about %}{% endblock about %}
</div>
</div>
</div>
</div>


<a id="contact" name="contact">
<div class="content-section-b">
<div class="container">
<div class="row">
{% block contact %}{% endblock contact %}
<a id="contact" name="contact">
<div class="content-section-b">
<div class="container">
<div class="row">
{% block contact %}{% endblock contact %}
</div>
</div>
</div>
</div>
</div>

{% include 'includes/footer.html' %}

{% block scripts %}
{{ super() }}
<script type="text/javascript" src="{{ url_for('static', filename='js/site.js') }}"></script>
{% endblock scripts %}

{% endblock body %}
{% include 'includes/footer.html' %} {% block scripts %} {{ super() }}
<script type="text/javascript" src="{{ url_for('static', filename='js/site.js') }}"></script>
{% endblock scripts %} {% endblock body %}
</body>
</html>

</html>
1 change: 1 addition & 0 deletions ExplainToMe/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ <h3>Automatic Web Article Summarizer</h3>
{{ form.hidden_tag() }}
{{ render_field(form.url, required=true) }}
{{ render_field(form.max_sent, required=true) }}
{{ render_field(form.language, required=true) }}
<div class="form-group">
<button id="send" type="submit" class="btn btn-success">
<span class="glyphicon glyphicon-send"></span> Send
Expand Down
17 changes: 14 additions & 3 deletions ExplainToMe/textrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
from sumy.summarizers.text_rank import TextRankSummarizer as Summarizer
from sumy.utils import cached_property, get_stop_words
from itertools import chain
from sumy.nlp.tokenizers import Tokenizer

from iso639 import languages

if six.PY2:
str = six.text_type
Expand Down Expand Up @@ -114,7 +117,11 @@ def run_summarizer(parser, sentences, language='english'):
for sentence in summarizer(parser.document, sentences)]


def get_parser(url, tokenizer):
def get_lang(name):
return languages.inverted.get(name.title()).part1

def get_parser(url, language):
tokenizer = Tokenizer(language)
useragent = ' '.join([
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6)",
"AppleWebKit/537.36 (KHTML, like Gecko)",
Expand All @@ -127,13 +134,17 @@ def get_parser(url, tokenizer):
extra_headers['Referer'] = r'https://t.co/T1323aaaa'
# Scrape Web Page With HTMLParser and Goose and select the best scrape
html_parser = HtmlParser.from_url(url, tokenizer, **extra_headers)
article = Goose({'browser_user_agent': useragent})
article = Goose({'browser_user_agent': useragent, 'target_language': get_lang(language)})

# Goose raises IndexError when requesting unfamiliar sites.
try:
extract = article.extract(url=url)
except:
extract = article.extract(raw_html=requests.get(url).text)
req = requests.get(url)
print(req.apparent_encoding)
extract = article.extract(
raw_html=req.content.decode(encoding=req.apparent_encoding)
)

goose_parser = PlaintextParser(extract, tokenizer)

Expand Down
6 changes: 3 additions & 3 deletions ExplainToMe/views/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
def index():
form = LinkForm()
if form.validate_on_submit():
url, max_sent = request.form.get('url'), request.form.get('max_sent')
language = 'english'
parser, meta = get_parser(url, Tokenizer(language))
url, max_sent, language = request.form.get('url'), request.form.get('max_sent'), request.form.get('language')
#language = 'english'
parser, meta = get_parser(url, language)
new_meta = meta.pop('meta', {})
meta.update(new_meta)
session['summary'] = run_summarizer(parser, max_sent, language=language)
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ chainmap==1.0.2
ipython>=5.3.0,<6.0.0
nltk==3.2.2
python-dateutil==2.6.0
sumy==0.6.0
sumy==0.7.0
requests==2.17.3
Flask-AppConfig==0.11.1
Flask-Debug==0.4.3
Flask-SSLify==0.1.5
iso-639==0.4.5
gunicorn
ipdb
gevent
Expand Down