# -*- coding: utf-8 -*-
import functools
import pandas as pd
import numpy as np
from .process import start_profile
from .process import end_profile
from .simple import add_text_summary_features
from .pos import add_text_pos_features
from .topics import add_text_topics_features
from .profanity import add_text_profanity_features
from .traits import add_text_trait_features
from .rhetoric import add_text_rhetoric_features
from .sentiment import add_text_sentiment_features
from .literacy import add_text_literacy_features
from .emoticons import add_text_emoticon_features
from .comparison import add_comparison_features
from .scarcity import add_scarcity_features
from .embedding import add_text_embedding_features
"""
texturizer.featurize: Core functions to apply a set of features to a data frame.
"""
########################################################################################
[docs]def process_df(df, params):
"""
process_df: Function that co-ordinates the process of generating the features
"""
start_profile("simple")
simple = add_text_summary_features( df, params["columns"] )
end_profile("simple")
if params["comparison"] :
start_profile("comparison")
simple = add_comparison_features( simple, params["columns"] )
end_profile("comparison")
if params["profanity"] :
start_profile("profanity")
simple = add_text_profanity_features( simple, params["columns"] )
end_profile("profanity")
if params["sentiment"] :
start_profile("sentiment")
simple = add_text_sentiment_features( simple, params["columns"] )
end_profile("sentiment")
if params["scarcity"] :
start_profile("scarcity")
simple = add_scarcity_features( simple, params["columns"] )
end_profile("scarcity")
if params["emoticons"] :
start_profile("emoticons")
simple = add_text_emoticon_features( simple, params["columns"] )
end_profile("emoticons")
if params["embedding"] :
start_profile("embedding")
if params["normalize_embedding"] :
simple = add_text_embedding_features( simple, params["columns"], 'normalize' )
else:
simple = add_text_embedding_features( simple, params["columns"] )
end_profile("embedding")
if params["topics"] :
start_profile("topics")
if params["count_matches"] :
if params["normalize_topics"] :
simple = add_text_topics_features( simple, params["columns"], 'normalize' )
else:
simple = add_text_topics_features( simple, params["columns"], 'count' )
else:
simple = add_text_topics_features( simple, params["columns"] )
end_profile("topics")
if params["traits"] :
start_profile("traits")
simple = add_text_trait_features( simple, params["columns"] )
end_profile("traits")
if params["rhetoric"] :
start_profile("rhetoric")
simple = add_text_rhetoric_features( simple, params["columns"] )
end_profile("rhetoric")
if params["pos"] :
start_profile("pos")
simple = add_text_pos_features( simple, params["columns"] )
end_profile("pos")
if params["literacy"] :
start_profile("literacy")
simple = add_text_literacy_features( simple, params["columns"] )
end_profile("literacy")
return simple
########################################################################################
[docs]def generate_feature_function(parameters):
"""
This function will take the processed command line arguments that determine
the feature to apply and partially apply them to the process_df function.
Returning a function that can be used to apply those parameters to multiple
chunks of a dataframe.
"""
return functools.partial(process_df, params = parameters)