Source code for mpl_bsic.style_excel

from typing import Optional, Union

import pandas as pd
import xlsxwriter

TITLE_FMT = {
    "bold": True,
    "italic": True,
    "align": "center",
    "valign": "center",
    "font_size": 11,
    "font_name": "Gill Sans MT",
}

HEADER_FMT = {
    "font_name": "Gill Sans MT",
    "font_size": 11,
    "italic": True,
    "bold": True,
    "text_wrap": True,
    "valign": "top",
    "align": "center",
    "fg_color": "#38329A",
    "color": "white",
    "top": 1,
    "bottom": 1,
}

BASE_BODY_FMT = {
    "font_name": "Garamond",
    "font_size": 12,
    "text_wrap": True,
    "valign": "top",
    "align": "center",
}

INDEX_FMT = BASE_BODY_FMT | {
    "bold": True,
    "right": 1,
    "left": 1,
    "bottom": 1,
}


def _format_worksheet(
    wb,
    ws,
    df: pd.DataFrame,
    title: Optional[str],
    offset: tuple[int, int],
    write_sources: bool = True,
):
    # check that the workbook and worksheet are valid
    if not isinstance(wb, xlsxwriter.workbook.Workbook):
        raise Exception("Workbook is not an xlsxwriter workbook")

    if not isinstance(ws, xlsxwriter.workbook.Worksheet):
        raise Exception("Worksheet is not an xlsxwriter worksheet")

    title_offset = 0 if title is None else 1

    start_row, start_col = offset
    # +1 to end_row to account for the title
    end_row = df.shape[0] + start_row + title_offset
    end_col = df.shape[1] + start_col

    def _write_index(df: pd.DataFrame):
        def index_format(i):
            fmt = INDEX_FMT | {
                "top": 1 if i == start_row else 0,
                "bottom": 1 if i == end_row else 0,
            }

            return wb.add_format(fmt)

        for row_num, value in enumerate(df.index.values):
            # leave space for the title and headings
            row_index = start_row + row_num + 1 + title_offset
            ws.write(row_index, start_col, value, index_format(row_index))

    def _write_data(df: pd.DataFrame, sources: str):
        def header_format(i):
            fmt = HEADER_FMT | {
                "left": 1 if i == start_col else 0,
                "right": 1 if i == end_col else 0,
            }

            return wb.add_format(fmt)

        def text_format(i: int):
            fmt = BASE_BODY_FMT | {
                "bottom": 1,
                "right": 1 if i == end_col else 0,
            }

            return wb.add_format(fmt)

        # write headers and data
        header_row_index = start_row + title_offset  # leave space for the title
        for i, col_name in enumerate(df.columns):
            col_index = start_col + i + 1  # leave space for the index names

            # write headers
            ws.write(header_row_index, col_index, col_name, header_format(col_index))

            # write data
            data = df[col_name].values
            for j, item in enumerate(data):
                row_index = header_row_index + j + 1
                ws.write(row_index, col_index, item, text_format(col_index))

        # write index name
        ws.write(header_row_index, start_col, df.index.name, header_format(0))

        # write sources
        if write_sources:
            sources_row_idx = end_row + 2
            ws.merge_range(
                sources_row_idx,
                start_col,
                sources_row_idx,
                end_col,
                f"Source: {sources}",
                wb.add_format(BASE_BODY_FMT),
            )
            ws.set_row(sources_row_idx - 1, 5)

    # set the correct row height
    ws.set_default_row(15.5)

    if isinstance(title, str):
        title_format = wb.add_format(TITLE_FMT)
        ws.merge_range(start_row, start_col, start_row, end_col, title, title_format)

    _write_index(df)
    _write_data(df, "BSIC")

    ws.hide_gridlines(2)



[docs]
def df_to_excel(
    df: Union[pd.DataFrame, list[pd.DataFrame]],
    path_to_excel: str,
    title: Optional[Union[str, list[str]]] = None,
    offset: tuple[int, int] = (1, 1),
):
    """Export a Pandas DataFrame as a formatted Excel table.

    Saves the content of the DataFrame to Excel
    and formats the table according to the BSIC Styling Standards.

    The function will also add a title (optional) at the very top of the table and
    a row containing `"Source: BSIC"` at the very bottom, leaving a blank row
    (with reduced height) in between for better aesthetics.

    Parameters
    ----------
    df : Union[pandas.DataFrame, list[pandas.DataFrame]]
        Either a single DataFrame or a list of dataframes
        to be included in the final file.
    path_to_excel : str
        The path for the final excel file, e.g. ``output/fmt_data.xlsx``.
    title : Optional[Union[str, list[str]]], optional
        Titles to be included above the formatted table, by default ``None``.
    offset : tuple[int, int], optional
        The offset to use in the formatted output, by default ``(1, 1)``.
        For example, if offset is set to ``(1,2)``,
        the formatter will leave 2 empty rows and
        1 empty column for better visualization.

    Raises
    ------
    Exception
        If you provide a list of DataFrames and you want to set titles, you must
        provide a title for each DataFrame.
    Exception
        The length of the DataFrame list/array must match the length
        of the titles list/array.
    Exception
        If you only provide a DataFrame (not a list), the title
        must also be a `str`, not a list.

    Warnings
    --------
    The source Excel file must only contain the headings, the index and the data,
    with no blank columns or rows in between them.
    The table has to start at row 0, col 0.

    See Also
    --------
    mpl_bsic.style_excel_file :
        Styles an Excel file. Use if you want to format data
        from a spreadsheet rather than a pandas DataFrame.

    Examples
    --------
    .. code-block:: python

        import pandas as pd
        from mpl_bsic import df_to_excel

        df = pd.read_excel("your_excel_file.xlsx")
        df_to_excel(
            df,
            "output_filename.xlsx",
            "Title of the Table",
            (2,2) # if you want the table to have an offset
        )
    """
    # create a new excel file with the formatted data from the dataframe
    wb = xlsxwriter.Workbook(path_to_excel)
    ws = wb.add_worksheet("output")

    if isinstance(df, list):
        if not isinstance(title, list):
            raise Exception(
                "If you provide a list of dataframes, you must provide a list of titles"
            )
        if not len(df) == len(title):
            raise Exception(
                "If you provide a list of dataframes, you must provide a list of titles of the same length"  # noqa: E501
            )

        ws_col0 = offset[0]

        ws_colend = ws_col0 + sum([_df.shape[1] for _df in df]) + len(df)
        row_source = offset[1] + max([_df.shape[0] for _df in df]) + 4

        for i, (_df, _title) in enumerate(zip(df, title)):
            new_start_col = offset[1] + i * _df.shape[0] + 1
            df_offset = (offset[0], new_start_col)
            _format_worksheet(wb, ws, _df, _title, df_offset, write_sources=False)

            ws.set_column(new_start_col - 1, new_start_col - 1, 3)

        ws.set_row(row_source - 1, 5)
        ws.merge_range(
            row_source,
            ws_col0,
            row_source,
            ws_colend,
            "Source: BSIC",
            wb.add_format(BASE_BODY_FMT),
        )

    else:
        if isinstance(title, list):
            raise Exception(
                "If you provide a single dataframe, you can only provide a single title"
            )

        _format_worksheet(wb, ws, df, title, offset)

    print("closing wb")
    wb.close()




[docs]
def style_excel_file(
    path_to_excel: str,
    sheet_name: str,
    title: Optional[str],
    offset: tuple[int, int] = (1, 1),
):
    """Format an already existing Excel file.

    The function will create a new file, with the same name as the source file
    and the suffix `"_fmt"`, in the same directory.
    It retrieves data and format it according to the BSIC Styling standards.

    The function will also add a title (optional) at the very top of the table and
    a row containing `"Source: BSIC"` at the very bottom, leaving a blank row
    (with reduced height) in between for better aesthetics.

    Parameters
    ----------
    path_to_excel : str
        The path to the Excel file.
    sheet_name : str
        The name of the worksheet containing the data to be formatted.
    title : Optional[str]
        The title to be given to the data. It will be displayed above the table,
        in the same style as the BSIC Headings.
    offset : tuple[int, int], optional
        The offset to use in the formatted output, by default ``(1, 1)``.
        For example, if offset is set to ``(1,2)``,
        the formatter will leave 2 empty rows and
        1 empty column for better visualization.

    Warnings
    --------
    The source Excel file must only contain the headings, the index and the data,
    with no blank columns or rows in between them.
    The table has to start at row 0, col 0.

    See Also
    --------
    mpl_bsic.df_to_excel :
        Exports a pandas DataFrame to an Excel file. Use if your data is in
        a DataFrame rather than on a spreadsheet.

    Examples
    --------
    .. code-block:: python

        from mpl_bsic import style_excel_file

        style_excel_file(
            "excel_file_path",
            "worksheet_name",
            "Title of the Table",
            (2,2) # if you want the table to have an offset
        )
    """
    # format the already existing excel file
    df = pd.read_excel(path_to_excel, sheet_name=sheet_name, index_col=0)
    # add _fmt to not overwrite the source excel file
    fmt_path = path_to_excel.replace(".xlsx", "_fmt.xlsx")
    wb = xlsxwriter.Workbook(fmt_path)
    ws = wb.add_worksheet("output")

    _format_worksheet(wb, ws, df, title, offset)

    wb.close()