fabricatio_typst.models.article_rag

A Module containing the article rag models.

Classes

ArticleChunk

The chunk of an article.

ArticleEssenceStorable

ArticleEssence with LanceDB storage capability.

CitationManager

Citation manager.

Module Contents

class fabricatio_typst.models.article_rag.ArticleChunk

Bases: fabricatio_lancedb.models.lancedb.LancedbDocumentModel[fabricatio_lancedb.rust.StoreDocument, fabricatio_lancedb.rust.SearchedDocument]

The chunk of an article.

etc_word: ClassVar[str] = '等'
and_word: ClassVar[str] = '与'
head_split: ClassVar[List[str]] = ['引 言', '引言', '绪 论', '绪论', '前言', 'INTRODUCTION', 'Introduction']
tail_split: ClassVar[List[str]] = ['参 献', '参      献', '参考文献', 'REFERENCES', 'References', 'Bibliography', 'Reference']
content: str

The segment of the article

year: int

The year of the article

authors: List[str] = None

The authors of the article

article_title: str

The title of the article

bibtex_cite_key: str

The bibtex cite key of the article

property reference_header: str

Get the reference header.

property cite_number: int

Get the cite number.

classmethod from_file[P: str | pathlib.Path](path: P | List[P], bib_mgr: fabricatio_typst.rust.BibManager, **kwargs: Unpack[fabricatio_typst.models.kwargs_types.ChunkKwargs]) List[Self]

Load the article chunks from the file.

classmethod strip(string: str) str

Strip the head and tail of the string.

as_typst_cite() str

As typst cite.

static purge_numeric_citation(string: str) str

Purge numeric citation.

property auther_lastnames: List[str]

Get the last name of the authors.

as_auther_seq() str

Get the auther sequence.

update_cite_number(cite_number: int) Self

Update the cite number.

prepare_insertion(vector: Sequence[float]) fabricatio_lancedb.rust.StoreDocument

Serialize typed fields into metadata for LanceDB storage.

classmethod from_raw(raw: fabricatio_lancedb.rust.SearchedDocument) Self

Deserialize from a LanceDB search result.

class fabricatio_typst.models.article_rag.ArticleEssenceStorable

Bases: fabricatio_typst.models.article_essence.ArticleEssence, fabricatio_lancedb.models.lancedb.LancedbDocumentModel[fabricatio_lancedb.rust.StoreDocument, fabricatio_lancedb.rust.SearchedDocument]

ArticleEssence with LanceDB storage capability.

class fabricatio_typst.models.article_rag.CitationManager

Bases: fabricatio_capabilities.models.generic.AsPrompt

Citation manager.

article_chunks: List[ArticleChunk] = []

Article chunks.

pat: str = '(\\[\\[([\\d\\s,-]*)]])'

Regex pattern to match citations.

sep: str = ','

Separator for citation numbers.

abbr_sep: str = '-'

Separator for abbreviated citation numbers.

update_chunks(article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True) Self

Update article chunks.

empty() Self

Empty the article chunks.

add_chunks(article_chunks: List[ArticleChunk], set_cite_number: bool = True, dedup: bool = True) Self

Add article chunks.

set_cite_number_all() Self

Set citation numbers for all article chunks.

apply(string: str) str

Apply citation replacements to the input string.

citation_count(string: str) int

Get the citation count in the string.

citation_coverage(string: str) float

Get the citation coverage in the string.

decode_expr(string: str) List[int]

Decode citation expression into a list of integers.

convert_to_numeric_notations(string: str) List[str]

Convert citation string into numeric notations.

deduplicate_citation(citation_seq: List[int]) List[int]

Deduplicate citation sequence.

unpack_cite_seq(citation_seq: List[int]) str

Unpack citation sequence into a string.

get_dedup_key_set() set[str]

Return the set of bibtex_cite_keys already held, for client-side dedup.