You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
44 lines
1.0 KiB
44 lines
1.0 KiB
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import json
|
|
import sys
|
|
from textblob import TextBlob
|
|
from newspaper import Article
|
|
import re
|
|
|
|
|
|
def main(params):
|
|
activation_id = os.environ.get('__OW_ACTIVATION_ID')
|
|
# params = json.loads(sys.argv[1])
|
|
url = params["url"]
|
|
article = Article(url)
|
|
article.download()
|
|
article.parse()
|
|
article.nlp()
|
|
data = article.summary
|
|
|
|
# Remove newlines and numbers in square brackets
|
|
data = re.sub(r'\n', ' ', data)
|
|
data = re.sub(r'\[\d+\]', '', data)
|
|
|
|
# Split summary into sentences based on periods
|
|
sentences = re.split(r'\.', data)
|
|
|
|
# Remove leading/trailing whitespaces and empty sentences
|
|
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
|
|
|
|
print(json.dumps({ "activation_id": str(activation_id),
|
|
"processed_data" : sentences
|
|
}))
|
|
|
|
return({"activation_id": str(activation_id),
|
|
|
|
"processed_data":sentences
|
|
})
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(params)
|