| | |
| | from abc import ABC, abstractmethod |
| | from dataclasses import dataclass |
| | from collections import namedtuple |
| | from typing import List, Optional |
| |
|
| | Summary = namedtuple('Summary',['source','cluster_list','link_ext','hed','dek','date','authors','original_length','summary_text','summary_length','chunk_time', 'query_time', 'mean_query_time', 'summary_time']) |
| | Summary.__doc__ = f""" |
| | Summary: a namedtuple for storing Summaries and relevant metadata. |
| | |
| | • Source: A Source object for the source of the summarized document. |
| | • cluster_list: A list of the NER entities detected in this article's hed (headline). |
| | • link_ext: The link extension of the article (on the base url, source's source_url) |
| | • hed, dek: headline and subheader. These are standard industry terms. |
| | Dek is None if not applicable. |
| | • date: Date of publication/update listed in article. |
| | • authors: list of authors, currently a string containing the byline. |
| | • original_length: length of the original article |
| | • cluster_num: Number of clusters the source article appears in |
| | • summary_text: List of summarized chunks. |
| | • summary_length: Length of summary text |
| | • stats for stats |
| | """ |
| |
|
| | @dataclass |
| | class Source(ABC): |
| | source_name: Optional[str] = "" |
| | source_url: Optional[str] = "" |
| | |
| | source_summarization_checkpoint: Optional[str] = "" |
| | source_ner_checkpoint: Optional[str] = "" |
| |
|
| | """ |
| | User must implement a source-dependent method |
| | to retrieve data used to create clusters. |
| | |
| | This gets called when clustering is performed. |
| | """ |
| | @abstractmethod |
| | def retrieve_cluster_data(self) -> List[namedtuple]: |
| | pass |
| |
|
| | """ |
| | User must implement a source-dependent method |
| | to retrieve texts for summarization. |
| | |
| | This gets called once topics for digestion have been selected. |
| | """ |
| | @abstractmethod |
| | def retrieve_article(self) -> List[namedtuple]: |
| | pass |
| |
|