Bizi Mart - Tobacco Product Reorder Point List (Version 1)¶

Created by: Ken Vellian¶

September 2024¶

In [1]:
# Numerical Operations and Data Manipulation 
import pandas as pd
import numpy as np
import numpy.linalg as la
from collections import Counter
import math
import random
 
# Linear Algebra and Matrix Operations
from scipy.sparse import csr_matrix
from scipy.linalg import svd
from scipy.sparse.linalg import svds

# Data Preprocessing and Machine Learning 
from sklearn import datasets
from sklearn import metrics
from sklearn.metrics import silhouette_samples, confusion_matrix, classification_report, accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, completeness_score, homogeneity_score
from sklearn.feature_extraction import DictVectorizer, text
from sklearn import preprocessing, model_selection, decomposition, feature_selection
from sklearn.model_selection import cross_val_score, train_test_split, KFold, GridSearchCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, SGDRegressor
from sklearn.feature_selection import SelectPercentile, f_regression
from sklearn.pipeline import Pipeline
from sklearn import svm
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity

# Plotting and Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.plotting import scatter_matrix
import graphviz
from IPython.display import Image

# Time-Related Functions
import time


# Import statements for PDF files
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# Import statements for Excel files
from openpyxl import Workbook
from openpyxl.styles import PatternFill
from openpyxl.utils.dataframe import dataframe_to_rows

# Miscellaneous
from operator import itemgetter, attrgetter
import importlib
import re

1. Loading and Cleaning dataset¶

In [2]:
# Load the CSV file
file_path = '/Users/kvellian/My Drive/Bizi Projects/Tobacco_Inv_8.28.24_9.11.24.csv'
df = pd.read_csv(file_path)
In [3]:
df.head()
Out[3]:
BIZI MART Vendor Detail Report\nDate: 08/28/2024 to 09/11/2024 Item # Item Name Cost Price Profit % Sold On Hand Order ... Items Count: 107 047995855079 American Spirit Black $0.00 $12.87 0.00 1 7 1.1
0 BIZI MART Vendor Detail Report\nDate: 08/28/2024 to 09/1... Item # Item Name Cost Price Profit % Sold On Hand Order ... Items Count: 107 4.799586e+10 American Spirit Blue $10.93 $12.87 15.09 24.0 18.0 1
1 BIZI MART Vendor Detail Report\nDate: 08/28/2024 to 09/1... Item # Item Name Cost Price Profit % Sold On Hand Order ... Items Count: 107 4.799586e+10 American Spirit Gold $11.07 $13.46 17.76 1.0 14.0 1
2 BIZI MART Vendor Detail Report\nDate: 08/28/2024 to 09/1... Item # Item Name Cost Price Profit % Sold On Hand Order ... Items Count: 107 7.161020e+10 BACKWOOD Smooth (1) $0.00 $1.69 0.00 2.0 16.0 1
3 BIZI MART Vendor Detail Report\nDate: 08/28/2024 to 09/1... Item # Item Name Cost Price Profit % Sold On Hand Order ... Items Count: 107 7.161020e+10 BACKWOOD Smooth (5 pk) $0.00 $7.99 0.00 2.0 65.0 1
4 BIZI MART Vendor Detail Report\nDate: 08/28/2024 to 09/1... Item # Item Name Cost Price Profit % Sold On Hand Order ... Items Count: 107 7.161030e+10 BACKWOOD SWEET AROMATIC (1) $0.00 $1.59 0.00 20.0 8.0 1

5 rows × 23 columns

In [4]:
df.info
Out[4]:
<bound method DataFrame.info of      BIZI MART Vendor Detail Report\nDate: 08/28/2024 to 09/11/2024  Item #  \
0    BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   
1    BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   
2    BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   
3    BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   
4    BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   
..         ...                                                ...       ...   
101  BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   
102  BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   
103  BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   
104  BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   
105  BIZI MART  Vendor Detail Report\nDate: 08/28/2024 to 09/1...    Item #   

     Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...  \
0    Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   
1    Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   
2    Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   
3    Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   
4    Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   
..         ...   ...    ...       ...   ...      ...    ...  ...   
101  Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   
102  Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   
103  Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   
104  Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   
105  Item Name  Cost  Price  Profit %  Sold  On Hand  Order  ...   

     Items Count:  107  047995855079             American Spirit Black  \
0    Items Count:  107  4.799586e+10              American Spirit Blue   
1    Items Count:  107  4.799586e+10              American Spirit Gold   
2    Items Count:  107  7.161020e+10               BACKWOOD Smooth (1)   
3    Items Count:  107  7.161020e+10            BACKWOOD Smooth (5 pk)   
4    Items Count:  107  7.161030e+10       BACKWOOD SWEET AROMATIC (1)   
..            ...  ...           ...                               ...   
101  Items Count:  107  8.667410e+05                      ZIG ZAG KING   
102  Items Count:  107  8.661490e+05            ZIG ZAG WHITE  (32 pk)   
103  Items Count:  107  7.847621e+11  Zig-Zag Wraps Straight-Up  (2pk)   
104  Items Count:  107  7.847621e+11  Zig-Zag Wraps Straight-Up (4 pk)   
105  Items Count:  107  6.092499e+11                     ZYN Chill 6mg   

      $0.00  $12.87   0.00     1     7  1.1  
0    $10.93  $12.87  15.09  24.0  18.0    1  
1    $11.07  $13.46  17.76   1.0  14.0    1  
2     $0.00   $1.69   0.00   2.0  16.0    1  
3     $0.00   $7.99   0.00   2.0  65.0    1  
4     $0.00   $1.59   0.00  20.0   8.0    1  
..      ...     ...    ...   ...   ...  ...  
101   $0.00   $3.29   0.00   1.0  24.0    1  
102   $0.00   $2.69   0.00   2.0  23.0    1  
103   $0.79   $1.49  46.85   8.0  49.0    1  
104   $1.00   $1.69  40.89  20.0  50.0    1  
105   $5.09   $7.59  32.89   8.0   8.0    1  

[106 rows x 23 columns]>
In [5]:
df.dtypes
Out[5]:
BIZI MART                                                object
Vendor Detail Report\nDate: 08/28/2024 to 09/11/2024     object
Item #                                                   object
Item Name                                                object
Cost                                                     object
Price                                                    object
Profit %                                                 object
Sold                                                     object
On Hand                                                  object
Order                                                    object
Vendor:                                                  object
TOBACCO {PITCO}                                          object
Unnamed: 12                                             float64
Items Count:                                             object
107                                                       int64
047995855079                                            float64
American Spirit Black                                    object
$0.00                                                    object
$12.87                                                   object
0.00                                                    float64
1                                                       float64
7                                                       float64
1.1                                                       int64
dtype: object
In [6]:
# Drop the first 15 columns and the 23rd column
df_cleaned = df.drop(df.columns[0:15], axis=1)  # Drop columns 1 to 15
df_cleaned = df_cleaned.drop(df.columns[-1], axis=1)  # Drop the 23rd column
In [7]:
# Drop any rows with blank entries (NAs)
df_cleaned = df_cleaned.dropna()
In [8]:
# Reconstruct the column names for columns 16 - 22
df_cleaned.columns = ['Item #', 'Item Name', 'Cost', 'Price', 'Profit %', 'Sold', 'On Hand']
In [9]:
df_cleaned.head()
Out[9]:
Item # Item Name Cost Price Profit % Sold On Hand
0 4.799586e+10 American Spirit Blue $10.93 $12.87 15.09 24.0 18.0
1 4.799586e+10 American Spirit Gold $11.07 $13.46 17.76 1.0 14.0
2 7.161020e+10 BACKWOOD Smooth (1) $0.00 $1.69 0.00 2.0 16.0
3 7.161020e+10 BACKWOOD Smooth (5 pk) $0.00 $7.99 0.00 2.0 65.0
4 7.161030e+10 BACKWOOD SWEET AROMATIC (1) $0.00 $1.59 0.00 20.0 8.0
In [10]:
df_cleaned.info
Out[10]:
<bound method DataFrame.info of            Item #                         Item Name    Cost   Price  Profit %  \
0    4.799586e+10              American Spirit Blue  $10.93  $12.87     15.09   
1    4.799586e+10              American Spirit Gold  $11.07  $13.46     17.76   
2    7.161020e+10               BACKWOOD Smooth (1)   $0.00   $1.69      0.00   
3    7.161020e+10            BACKWOOD Smooth (5 pk)   $0.00   $7.99      0.00   
4    7.161030e+10       BACKWOOD SWEET AROMATIC (1)   $0.00   $1.59      0.00   
..            ...                               ...     ...     ...       ...   
101  8.667410e+05                      ZIG ZAG KING   $0.00   $3.29      0.00   
102  8.661490e+05            ZIG ZAG WHITE  (32 pk)   $0.00   $2.69      0.00   
103  7.847621e+11  Zig-Zag Wraps Straight-Up  (2pk)   $0.79   $1.49     46.85   
104  7.847621e+11  Zig-Zag Wraps Straight-Up (4 pk)   $1.00   $1.69     40.89   
105  6.092499e+11                     ZYN Chill 6mg   $5.09   $7.59     32.89   

     Sold  On Hand  
0    24.0     18.0  
1     1.0     14.0  
2     2.0     16.0  
3     2.0     65.0  
4    20.0      8.0  
..    ...      ...  
101   1.0     24.0  
102   2.0     23.0  
103   8.0     49.0  
104  20.0     50.0  
105   8.0      8.0  

[104 rows x 7 columns]>
In [11]:
df_cleaned.dtypes
Out[11]:
Item #       float64
Item Name     object
Cost          object
Price         object
Profit %     float64
Sold         float64
On Hand      float64
dtype: object

2. Converting "$" columns to float¶

In [12]:
# Convert 'Cost' and 'Price' to numeric values
df_cleaned['Cost'] = df_cleaned['Cost'].replace('[\$,]', '', regex=True).astype(float)
df_cleaned['Price'] = df_cleaned['Price'].replace('[\$,]', '', regex=True).astype(float)
In [13]:
df_cleaned.head()
Out[13]:
Item # Item Name Cost Price Profit % Sold On Hand
0 4.799586e+10 American Spirit Blue 10.93 12.87 15.09 24.0 18.0
1 4.799586e+10 American Spirit Gold 11.07 13.46 17.76 1.0 14.0
2 7.161020e+10 BACKWOOD Smooth (1) 0.00 1.69 0.00 2.0 16.0
3 7.161020e+10 BACKWOOD Smooth (5 pk) 0.00 7.99 0.00 2.0 65.0
4 7.161030e+10 BACKWOOD SWEET AROMATIC (1) 0.00 1.59 0.00 20.0 8.0

3. Create "Brand" column and organize products by brand¶

In [14]:
# Step 5: Dictionary for regex patterns for each brand
brand_patterns = {
    'American Spirit': r'\bamerican\s*spirit\b',
    'Backwoods': r'\bbackwood[s]*\b',
    'Black & Mild': r'\bblack[\s&\-]*mild\b',
    'BLK Smooth': r'\bblk\b',
    'Bugler': r'\bbugler\b',
    'Camel': r'\bcamel\b|\bkamel\b',
    'Capri': r'\bcapri\b',
    'Copenhagen': r'\bcopenhagen\b',
    'Crowns': r'\bcrowns\b',
    'Crownz': r'\bcrownz\b',
    'Dutch': r'\bdutch\b',  
    'Fiesta': r'\bfiesta\b',
    'FLM': r'\bflm\b',
    'Fronto Leaf': r'\bfronto\b',
    'Good Time': r'\bgood[\s\-]*times?\b',  # Combines "Good Times" and "Good Time"
    'Grabba': r'\bgrabba\b',
    'HIGH HEMP': r'\bhigh[\s\-]*hemp\b',
    'Job': r'\bjob\b',
    'Kool': r'\bkool\b',  
    'Longhorn': r'\blonghorn\b',
    'Looseleaf': r'\blooseleaf\b',
    'Lucky Strike': r'\blucky\s*strike\b',
    'Marlboro': r'\bmarlboro\b',
    'Maverick': r'\bmaverick\b',
    'Newport': r'\bnewport\b',
    'Optimo': r'\boptimo\b',
    'Pall Mall': r'\bpall\s*mall\b',
    'Parliament': r'\bparliament\b',
    'Pom Pom': r'\bpom(\s*pom)?\b',  # Combines "Pom" and "Pom Pom"
    'RAW': r'\braw\b',
    'Red River': r'\bred\s*river\b',
    'Rogue': r'\brogue\b',
    'Seneca': r'\bseneca\b',
    'Show': r'\bshow\b',
    'Swisher': r'\bswisher\b',
    'Winston': r'\bwinston\b',
    'XXL': r'\bxxl\b',
    'Zemis': r'\bzemis\b',
    'Zig-Zag': r'\bzig[\s\-]*zag\b',    
    'ZYN': r'\bzyn\b'
}
In [15]:
# Step 6: Function to extract brand using regex
def extract_brand_regex(item_name, patterns):
    for brand, pattern in patterns.items():
        if re.search(pattern, item_name, re.IGNORECASE):
            return brand
    return 'Other'  # Default if no brand matches
In [16]:
# Step 7: Apply the function to create the 'Brand' column with enhanced categorization
df_cleaned['Brand'] = df_cleaned['Item Name'].apply(lambda x: extract_brand_regex(x, brand_patterns))
In [17]:
# Display the first few rows to confirm the changes
df_cleaned.head()
Out[17]:
Item # Item Name Cost Price Profit % Sold On Hand Brand
0 4.799586e+10 American Spirit Blue 10.93 12.87 15.09 24.0 18.0 American Spirit
1 4.799586e+10 American Spirit Gold 11.07 13.46 17.76 1.0 14.0 American Spirit
2 7.161020e+10 BACKWOOD Smooth (1) 0.00 1.69 0.00 2.0 16.0 Backwoods
3 7.161020e+10 BACKWOOD Smooth (5 pk) 0.00 7.99 0.00 2.0 65.0 Backwoods
4 7.161030e+10 BACKWOOD SWEET AROMATIC (1) 0.00 1.59 0.00 20.0 8.0 Backwoods

Categorizing products by type¶

Ex: Cigarette, Cigar, Wrap, Roll. Paper, Chew, Other¶

Notes for later: If no indication... 2 pk, 3 pk, (1), etc. organize into 1 category¶

In [18]:
# Define regex patterns for categorizing "Wrap" products
wrap_patterns = [
    r'\bwrap\b',          # Matches 'wrap'
    r'\bblunt\b',         # Matches 'blunt'
    r'\bwraps\b',         # Matches 'wraps'
    # r'\bleaf\b',          # Matches 'leaf'
    r'\bwrap\s*kit\b',    # Matches 'wrap kit'
    # Add more patterns as needed
]

# Function to categorize "Wrap" products using regex
def categorize_wrap(item_name, patterns):
    for pattern in patterns:
        if re.search(pattern, item_name, re.IGNORECASE):
            return 'Wrap'
    return None  # Return None if no pattern matches

# Apply the function to create the "Type" column for Wrap products
df_cleaned['Type'] = df_cleaned['Item Name'].apply(lambda x: categorize_wrap(x, wrap_patterns))

# Display the first few rows to verify the results
df_cleaned.head()
Out[18]:
Item # Item Name Cost Price Profit % Sold On Hand Brand Type
0 4.799586e+10 American Spirit Blue 10.93 12.87 15.09 24.0 18.0 American Spirit None
1 4.799586e+10 American Spirit Gold 11.07 13.46 17.76 1.0 14.0 American Spirit None
2 7.161020e+10 BACKWOOD Smooth (1) 0.00 1.69 0.00 2.0 16.0 Backwoods None
3 7.161020e+10 BACKWOOD Smooth (5 pk) 0.00 7.99 0.00 2.0 65.0 Backwoods None
4 7.161030e+10 BACKWOOD SWEET AROMATIC (1) 0.00 1.59 0.00 20.0 8.0 Backwoods None
In [19]:
# Dictionaries focusing on non-wrap items
cigarette_brands = {
    'Marlboro': 'Cigarette',
    'Camel': 'Cigarette',
    'Newport': 'Cigarette',
    'American Spirit': 'Cigarette',
    'Pall Mall': 'Cigarette',
    'Winston': 'Cigarette',
    'Lucky Strike': 'Cigarette',
    'Kool': 'Cigarette',
    'Capri': 'Cigarette',
    'Maverick': 'Cigarette',
    'Seneca': 'Cigarette',
    'Crowns': 'Cigarette',
    'Parliament': 'Cigarette'
}

cigar_brands = {
    'Backwoods': 'Cigar',  # Note: Backwoods may also carry wraps, so focus on non-wrap items
    'Swisher': 'Cigar',    # Note: Swisher may also carry wraps, so focus on non-wrap items
    'Dutch': 'Cigar',      # Note: Dutch may also carry wraps, so focus on non-wrap items
    'Black & Mild': 'Cigar',
    'Optimo': 'Cigar',
    'Pom Pom': 'Cigar',
    'BLK Smooth': 'Cigar',
    'Good Time': 'Cigar',  # Note: Good Time may also carry wraps, so focus on non-wrap items
    'Zemis': 'Cigar',
    'Show': 'Cigar',  # Note: Show may also carry wraps, so focus on non-wrap items
    'Crownz': 'Cigar',
    'Fiesta': 'Cigar'
}

chew_brands = {
    'Copenhagen': 'Chew',
    'ZYN': 'Chew',
    'Longhorn': 'Chew',
    'Rogue': 'Chew'
}

roll_paper_brands = {
    'RAW': 'Roll Paper',     # Note: RAW may also carry wraps, so focus on non-wrap items
    'Zig-Zag': 'Roll Paper', # Note: Zig-Zag may also carry wraps, so focus on non-wrap items
    'Job': 'Roll Paper',
    'Red River': 'Roll Paper',
    'HIGH HEMP': 'Wrap'
}

other_brands = {
    'Bugler': 'Other',
    'Fronto Leaf': 'Other',
    'Looseleaf': 'Other',
    'Grabba': 'Other',  # Note: Grabba may also carry wraps, so focus on non-wrap items
    'XXL': 'Other',   # Note: XXL may also carry wraps, so focus on non-wrap items
    'FLM': 'Other',
}
In [20]:
# Function to categorize by dictionary without overwriting existing "Wrap" types
def categorize_by_dict(brand, current_type):
    if current_type == 'Wrap':
        return current_type  # Do not overwrite "Wrap" type
    if brand in cigarette_brands:
        return cigarette_brands[brand]
    elif brand in cigar_brands:
        return cigar_brands[brand]
    elif brand in chew_brands:
        return chew_brands[brand]
    elif brand in roll_paper_brands:
        return roll_paper_brands[brand]
    elif brand in other_brands:
        return other_brands[brand]
    else:
        return 'Other'  # Default to 'Other' if no match is found
In [21]:
# Apply the function to fill in the "Type" column for non-Wrap products
df_cleaned['Type'] = df_cleaned.apply(lambda row: categorize_by_dict(row['Brand'], row['Type']), axis=1)

# Display the first few rows to verify the results
df_cleaned.head()
Out[21]:
Item # Item Name Cost Price Profit % Sold On Hand Brand Type
0 4.799586e+10 American Spirit Blue 10.93 12.87 15.09 24.0 18.0 American Spirit Cigarette
1 4.799586e+10 American Spirit Gold 11.07 13.46 17.76 1.0 14.0 American Spirit Cigarette
2 7.161020e+10 BACKWOOD Smooth (1) 0.00 1.69 0.00 2.0 16.0 Backwoods Cigar
3 7.161020e+10 BACKWOOD Smooth (5 pk) 0.00 7.99 0.00 2.0 65.0 Backwoods Cigar
4 7.161030e+10 BACKWOOD SWEET AROMATIC (1) 0.00 1.59 0.00 20.0 8.0 Backwoods Cigar

Creating Case quantity column.¶

In [22]:
# Updated function to determine case quantity including rules for 'Wrap' type
def determine_case_quantity(item_name, item_type, brand):
    # Default case quantity
    case_quantity = ''

    # Rule for Cigarette type
    if item_type == 'Cigarette':
        return 10

    # Apply rules for Cigar type
    if item_type == 'Cigar':
        # Regex pattern matching for "(1)", "2 pk", "3 pk", etc.
        pattern = r'(\d+)\s*(?:pk|\))'
        match = re.search(pattern, item_name, re.IGNORECASE)
        
        if match:
            num_units = int(match.group(1))
            
            # Rules for specific brands
            if brand in ['Backwoods', 'Zemis']:
                return 8  # All cases for these brands return 8
            
            elif brand == 'Black & Mild':
                if num_units == 5:
                    return 10
                return 25  # Default or num_units == 1
            
            elif brand == 'BLK Smooth':
                if num_units == 2:
                    return 15
                return 25  # Default or num_units == 1

            elif brand == 'Dutch':
                return 15  # Covers all cases for Dutch

            elif brand == 'Good Time':
                if num_units == 3:
                    return 20
                return 15  # Default or num_units == 2

            elif brand in ['Swisher', 'Optimo']:
                return 15  # Covers all cases for these brands
                
            elif brand in ['Pom Pom', 'Show', 'Fiesta']:
                return 15  # Covers all cases for these brands

        # Default case assignment for specific brands when no regex match is found
        if brand == 'Black & Mild':
            return 25
        elif brand in ['Swisher', 'Dutch', 'BLK Smooth', 'Optimo']:
            return 15

    # Apply rules only if the item type is 'Wrap'
    if item_type == 'Wrap':
        # Regex pattern matching for "(1)", "2 pk", "4 pk", etc.
        pattern = r'(\d+)\s*(?:pk|\))'
        match = re.search(pattern, item_name, re.IGNORECASE)

        if match:
            num_units = int(match.group(1))

            # Rules for specific brands
            if brand == 'Backwoods':
                if num_units == 5:
                    return 8

            elif brand == 'Good Time':
                if num_units == 2:
                    return 30
                return 30  # Default if no pattern matched

            elif brand == 'Grabba':
                if num_units == 1:
                    return 8
                if num_units == 2:
                    return 25

            elif brand == 'HIGH HEMP':
                if num_units == 2:
                    return 15
                return 15  # Default if no pattern matched

            elif brand == 'Looseleaf':
                if num_units == 5:
                    return 8
                return 15  # Default if no pattern matched

            elif brand == 'XXL':
                if num_units == 2:
                    return 15

            elif brand == 'Zig-Zag':
                if num_units == 2:
                    return 15
                elif num_units == 4:
                    return 15

        # Default case assignment for specific brands when no regex match is found
        if brand == 'HIGH HEMP':
            return 15

    # If no specific rule matches, return default blank
    return case_quantity
In [23]:
# Apply the updated function to create a new 'Case' column
df_cleaned['Box'] = df_cleaned.apply(lambda row: determine_case_quantity(row['Item Name'], row['Type'], row['Brand']), axis=1)

Come back to fix Roll Paper and Other type cases¶

Calculate case sold and on hand¶

In [24]:
# Convert the 'Sold', 'On Hand', and 'Box' columns to numeric, coercing errors to NaN
df_cleaned['Sold'] = pd.to_numeric(df_cleaned['Sold'], errors='coerce')
df_cleaned['On Hand'] = pd.to_numeric(df_cleaned['On Hand'], errors='coerce')
df_cleaned['Box'] = pd.to_numeric(df_cleaned['Box'], errors='coerce')

# Recalculate the "Sold by Box" and "On Hand by Box" with the corrected data types
df_cleaned['Sold by Box'] = df_cleaned.apply(
    lambda row: f"{int(row['Sold'] // row['Box'])} boxes" + 
    (f" and {int(row['Sold'] % row['Box'])} pieces" if row['Sold'] % row['Box'] > 0 else "")
    if pd.notna(row['Box']) and row['Box'] > 0 else "", axis=1)

df_cleaned['On Hand by Box'] = df_cleaned.apply(
    lambda row: f"{int(row['On Hand'] // row['Box'])} boxes" + 
    (f" and {int(row['On Hand'] % row['Box'])} pieces" if row['On Hand'] % row['Box'] > 0 else "")
    if pd.notna(row['Box']) and row['Box'] > 0 else "", axis=1)

Reorganize column order¶

In [25]:
# Reorganize the columns in the specified order
df_cleaned = df_cleaned[['Item #', 'Item Name', 'Brand', 'Type', 'Box', 'Sold by Box', 'On Hand by Box', 'Sold', 'On Hand']]
In [26]:
# Reorganize the dataframe by sorting it based on the 'Type' column in alphabetical order
sorted_data = df_cleaned.sort_values(by=['Type', 'Item Name'], ascending=[True, True]).reset_index(drop=True)

Save the Dataframe as a CSV file¶

In [27]:
# # Save the final DataFrame to a new CSV file

# final_output_path = '/Users/kvellian/My Drive/DePaul University/Bizi Projects/output_by_case.csv'
# sorted_data.to_csv(final_output_path, index=False)

Saving as an Excel file and Color Coding Reorder Point Conditions¶

In [28]:
def save_sorted_data_with_formatting(df, output_path):
    """
    Saves the provided DataFrame to an Excel file with conditional formatting,
    incorporating "Box" values into reorder point calculations.

    Args:
        df (pd.DataFrame): The DataFrame to save and format.
        output_path (str): The file path to save the Excel file.

    """
    # Convert the final DataFrame to an Excel file
    with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
        df.to_excel(writer, index=False, sheet_name='Sheet1')
        worksheet = writer.sheets['Sheet1']

        # Dictionary that maps color names to their hex codes
        color_map = {
            "red": "FF0000",
            "yellow": "FFFF00",
            "green": "00FF00",
            "orange": "FFA500"
        }
        
        # Use the color_map to assign hex values to the PatternFill
        red_fill = PatternFill(start_color=color_map["red"], end_color=color_map["red"], fill_type="solid")
        yellow_fill = PatternFill(start_color=color_map["yellow"], end_color=color_map["yellow"], fill_type="solid")
        green_fill = PatternFill(start_color=color_map["green"], end_color=color_map["green"], fill_type="solid")
        orange_fill = PatternFill(start_color=color_map["orange"], end_color=color_map["orange"], fill_type="solid")


        # Determine the number of columns in the DataFrame
        max_column = len(df.columns)

        # Apply conditional formatting row by row
        for idx, row in df.iterrows():
            sold = row['Sold']
            on_hand = row['On Hand']
            box = row['Box']
            excel_row = idx + 2  # +2 because Excel rows start at 1 and there is a header row

            # Red Condition
            if sold > on_hand or on_hand < box:
                # Change condition if needed
                for cell in worksheet[f'A{excel_row}':f'{chr(64+max_column)}{excel_row}']:
                    for c in cell:
                        c.fill = red_fill
            # Yellow Condition
            elif on_hand <= 1.5 * sold or (box and on_hand <= 1.5 * box and on_hand > box):
                # Change condition if needed
                for cell in worksheet[f'A{excel_row}':f'{chr(64+max_column)}{excel_row}']:
                    for c in cell:
                        c.fill = yellow_fill
            # Orange Condition (New)
            elif (on_hand <= 2 * sold and on_hand > 1.5 * sold) or (box and on_hand <= 2 * box and on_hand > 1.5 * box):
                # Change condition if needed
                for cell in worksheet[f'A{excel_row}':f'{chr(64+max_column)}{excel_row}']:
                    for c in cell:
                        c.fill = orange_fill
            # Green Condition
            elif on_hand > 2 * sold and (box is None or on_hand > 2 * box):
                # Change condition if needed
                for cell in worksheet[f'A{excel_row}':f'{chr(64+max_column)}{excel_row}']:
                    for c in cell:
                        c.fill = green_fill
In [29]:
# Save Excel file
excel_color_coded_path = '/Users/kvellian/My Drive/Bizi Projects/Bizi_Tobacco_List_9.11.24.xlsx'
save_sorted_data_with_formatting(sorted_data, excel_color_coded_path)
In [ ]:
 

Save the CSV file and PDF file¶

In [ ]:
 
In [30]:
# def save_dataframe_to_pdf_adjusted_with_bold_headers(df, pdf_file_path):
#     # Define the maximum number of rows per page
#     max_rows_per_page = 20
    
#     # Create a PDF file to save the table
#     with PdfPages(pdf_file_path) as pdf:
#         for i in range(0, len(df), max_rows_per_page):
#             fig, ax = plt.subplots(figsize=(12, 14))  # Adjust size to fit the page
#             ax.axis('tight')
#             ax.axis('off')
            
#             # Extract the subset of the DataFrame for the current page
#             subset_df = df.iloc[i:i + max_rows_per_page]
            
#             # Create the table with text wrapping and bold headers
#             table = ax.table(cellText=subset_df.values,
#                              colLabels=subset_df.columns,
#                              cellLoc='center',
#                              loc='center',
#                              colWidths=[0.15, 0.35, 0.2, 0.12, 0.1, 0.15, 0.15])  # Adjust the column widths
            
#             # Set properties for better text visibility
#             table.auto_set_font_size(False)
#             table.set_fontsize(7)  # Slightly reduce the font size
#             table.scale(1.2, 1.8)  # Increase scaling for better spacing
            
#             # Bold the header row
#             for key, cell in table.get_celld().items():
#                 cell.set_text_props(wrap=True)
#                 cell.set_height(0.05)  # Adjust row height to prevent overlapping
#                 if key[0] == 0:  # First row is the header
#                     cell.set_fontsize(8)
#                     cell.set_text_props(weight='bold')
                
#             # Save the current page to the PDF
#             pdf.savefig(fig, bbox_inches='tight')
#             plt.close(fig)
In [31]:
# # Save the DataFrame to PDF with bold headers
# pdf_file_path_bold_headers = '/Users/kvellian/My Drive/Bizi Projects/output_by_case.pdf'
# save_dataframe_to_pdf_adjusted_with_bold_headers(sorted_data, pdf_file_path_bold_headers)
In [ ]: