Source code for texturizer.rhetoric

# -*- coding: utf-8 -*-
import pkg_resources
import pandas as pd 
import numpy as np
import math
import os
import re

from .process import load_word_pattern
 
"""
    texturizer.rhetoric: Feature flags indicating rhetorical devices

"""

########################################################################################

cliches_pat = load_word_pattern('cliches.dat')

jargon_pat = load_word_pattern('jargon.dat')

authority_pat = load_word_pattern('authority.dat')

########################################################################################
[docs]def add_text_rhetoric_features(df, columns):
    """
        Given a pandas dataframe and a set of column names.
        calculate the rhetoric trait features and add them.
    """
    rez = df.copy()
    for col in columns:
        rez = add_rhetoric_counts(rez, col)
    return rez

########################################################################################
[docs]def add_rhetoric_counts(df, col):
    """
        Given a pandas dataframe and a column name.
        Count the number of pattern matches for feature
    """
    df[col+'_cliches']=df[col].str.count(cliches_pat, flags=re.IGNORECASE)
    df[col+'_jargon']=df[col].str.count(jargon_pat, flags=re.IGNORECASE)
    df[col+'_authority']=df[col].str.count(authority_pat, flags=re.IGNORECASE)
    return df