def SBU(df, target_column, percentage):
    """
    Removes a specific percentage of zeros from streaks greater than the average in a DataFrame.

    Parameters:
    df (pd.DataFrame): DataFrame with the data.
    target_column (str): The name of the target column with the values.
    percentage (float): Percentage of zeros to remove from streaks greater than the average.

    Returns:
    pd.DataFrame: DataFrame with the corresponding rows removed.
    """
    consecutive_zeros_lengths = []
    count_consecutive_zeros = 0
    indices_to_remove = []

    data = df[target_column]

    # Calculate the lengths of zero streaks
    for idx, value in enumerate(data.values):
        if value == 0:
            count_consecutive_zeros += 1
        else:
            if count_consecutive_zeros > 0:
                consecutive_zeros_lengths.append((idx - count_consecutive_zeros, count_consecutive_zeros))
                count_consecutive_zeros = 0

    # Add the final length of consecutive zeros if the sequence ends with zeros
    if count_consecutive_zeros > 0:
        consecutive_zeros_lengths.append((len(data) - count_consecutive_zeros, count_consecutive_zeros))

    # Calculate statistics
    if consecutive_zeros_lengths:
        average_length = sum(length for _, length in consecutive_zeros_lengths) / len(consecutive_zeros_lengths)
    else:
        return df  # No consecutive zeros

    # Identify the zeros to remove
    for start_idx, length in consecutive_zeros_lengths:
        if length > average_length:
            num_to_remove = int(length * percentage)
            indices_to_remove.extend(np.linspace(start_idx, start_idx + length - 1, num_to_remove, dtype=int))

    # Create a mask to remove the selected zeros
    mask = np.ones(len(df), dtype=bool)
    mask[indices_to_remove] = False
    new_df = df[mask]

    return new_df

def analyze_target_variable(df, target_variable):
    # Histogram
    plt.hist(df[target_variable], alpha=0.7, edgecolor='black', bins=50)
    plt.xlabel(target_variable)
    plt.ylabel('Frequency')
    #plt.title(f'Distribution of target variable data {target_variable}')
    plt.show()

    # KDE Plot
    sns.kdeplot(df[target_variable])
    plt.show()

    # Calculate statistics
    target_counts = df[target_variable].value_counts().sort_index()
    total_values = df[target_variable].count()
    print(f"Total values = {total_values}")

    zero_values = target_counts.loc[0] if 0 in target_counts.index else 0
    positive_values = total_values - zero_values
    print(f"Total values equal to 0 = {zero_values}")

    thresholds = [0, 1, 2, 4, 8, 16, 32]
    for threshold in thresholds:
        count = target_counts.loc[target_counts.index > threshold].sum()
        print(f"Values greater than {threshold} = {count}")

    proportion_zero = zero_values / total_values
    proportion_positive = positive_values / total_values

    print(f"Proportion of values equal to 0: {proportion_zero:.2f}")
    print(f"Proportion of values greater than 0: {proportion_positive:.2f}")

    ranges = [0.1, 0.5, 1, 0.05, 0.025]
    for r in ranges:
        values_between_a_and_b = df[target_variable][(df[target_variable] > 0) & (df[target_variable] <= r)].count()
        print(f"Total values greater than 0 but less than or equal to {r} = {values_between_a_and_b}")