# quorumSubsample 4.3 by John Alroy
# see end of file for version history

# location of input and output data files
$PATH = "./";
# if multiple files are listed, subsampled diversity for each one will
#  be reported both jointly and separately
@FILES = ("Theropoda - 1991.csv");

$TRIALS = 1000;
# options CR, UW, or O2W (default SQS)
$METHOD = "SQS";
# taxonomic rank (required, options genus or species)
$RANK = "genus";
if ( $RANK !~ /^(genus|species)$/ )	{
	print "\nExiting because RANK must be 'species' or 'genus'\n\n";
	exit;
}
# time scale (options stages, subepochs, FR2, Peters, a file with numbers,
#  or a number; default is 11 Myr bins)
$SCALE = "Time bins - stage.txt";
# a number in Ma is required if SCALE is numerical
$MAXMA = "";

if ( $METHOD eq "SQS" || ! $METHOD )	{
	$QUORUM = 0.4;
} elsif ( $METHOD eq "CR" )	{
	$QUOTA = 1000;
} elsif ( $METHOD eq "UW" )	{
	$QUOTA = 100;
} elsif ( $METHOD eq "O2W" )	{
	$QUOTA = 10000;
}

# draw all occurrences and count genera seen at the quorum level(s)
#  (option no, default yes)
$EXACT = "yes";
# subsample abundance data (only works for CR and SQS; default no)
$ABUND = "no";
# draw entire collections (option no, default yes)
$BYCOLLECTION = "yes";
# definition of singletons (option one reference and not recommended,
#  default one occurrence)
$SINGLETONS = "";
# exclude most common (dominant) genus (option yes, default no)
$EXCLUDE = "no";
# exclude taxa in most diverse collection from singleton count
#  (option yes, default no)
$BIGGEST = "no";

# draw all collections in a reference before going to the next one (optional
#  and recommended)
$SEQUENTIAL = "yes";
# fixed number of collections drawn per reference (default no limit,
#  but recommended)
$COLLSPERREF = 3;
# minimum number of references per bin (default none, and not recommended)
$REFQUOTA = 0;
# disperse sampling among references with a throwback algorithm (optional
#  and not recommended)
$DISPERSE = "no";

# assign collections spanning multiple bins to the ones including more than
#  half of their age estimate limits (default no; options yes or a fraction)
$DEORPHAN = "no";
if ( $DEORPHAN > 0 && $DEORPHAN < 0.5 || $DEORPHAN > 1 )	{
	print "\nExiting because DEORPHAN must be 'yes' or a number between 0.5 and 1.0\n\n";
	exit;
}
if ( $DEORPHAN =~ /y/i )	{
	$DEORPHAN = 0.5;
}
# use interpolated boundary estimates in PaleoDB download file (default no,
#  but recommended)
$INTERPOLATE = "";

# print data for bins sometimes under quota (default no)
$USEFAILED = "";
# print data only for bins in the range of those that meet the subsampling
#  target (default no)
$TRIM = "";

# number of collections a bin must include to be analyzed
$MINCOLLBYBIN = 1;
# intervals must include as many collections as the maximum drawn
#  (default no, and not recommended)
$MATCHMAX = "";


if ( $SCALE !~ /[^0-9]/ && $MAXMA == 0 )	{
	print "\nExiting because MAXMA has not been set\n\n";
	exit;
}

if ( $EXACT !~ /n/i && $SINGLETONS =~ /r/i )	{
	print "\nWarning: SINGLETONS is forced to equal 'occurrences' if you choose the EXACT option\n";
}

if ( $EXACT !~ /n/i && $DISPERSE =~ /y/i )	{
	print "\nWarning: DISPERSE doesn't work if you choose the EXACT option\n";
}


my ($nbin,%lookup,@binlist,@base,@top,$numeric);
if ( ! $SCALE )	{
	%BINS = ("Cambrian" => 4, "Ordovician" => 5, "Silurian" => 2, "Devonian" => 5, "Carboniferous" => 5, "Permian" => 4, "Triassic" => 4, "Jurassic" => 6, "Cretaceous" => 8, "Cenozoic" => 6);

	for my $p ( "Cambrian", "Ordovician", "Silurian", "Devonian", "Carboniferous", "Permian", "Triassic", "Jurassic", "Cretaceous", "Cenozoic" )	{
		for my $b ( 1..$BINS{$p} )	{
			$nbin++;
			$lookup{$p." ".$b} = $nbin;
			my $i = $p;
			if ( $i =~ /^C/ )	{
				$i =~ s/Cambrian/Cm/;
				$i =~ s/Carboniferous/C/;
				$i =~ s/Cretaceous/K/;
				$i =~ s/Cenozoic/Cz/;
			} else	{
				$i =~ s/(.).*/$1/;
			}
			$binlist[$nbin] = $i.$b;
		}
	}
} elsif ( $SCALE !~ /stages|epochs|fr2|peters/i && $SCALE =~ /[A-Za-z].*[^0-9]/ )	{
	open IN,"<./$SCALE";
	while (<IN>)	{
		$nbin++;
	}
	close IN;
	# assumes there is no header, columns are tab-delimited, the first
	#  column gives the base of the bin in Ma, and the second gives the
	#  bin name
	my $id = $nbin;
	open IN,"<./$SCALE";
	while (<IN>)	{
		s/\n//;
		my @words = split /\t/,$_;
		$base[$id] = sprintf "%.3f",$words[0];
		$top[$id-1] = $base[$id];
		if ( $words[1] )	{
			$binlist[$id] = $words[1];
		} else	{
			$binlist[$id] = $nbin;
		}
		$id--;
	}
	close IN;
	# rename the scale to allow tests for numeric values later
	$SCALE = "custom";
} elsif ( $SCALE =~ /stages|epochs|fr2|peters/i )	{
	if ( $SCALE =~ /stages/ )	{
		open IN,"<./Gradstein_stages";
	} elsif ( $SCALE =~ /epochs/ )	{
		open IN,"<./Gradstein_subepochs";
	} elsif ( $SCALE =~ /fr2/i )	{
		open IN,"<./FR2_bins";
	} elsif ( $SCALE =~ /peters/ )	{
		open IN,"<./Peters_stages";
	}
	while (<IN>)	{
		s/\n//;
		s/"//g;
		$nbin++;
		$lookup{$_} = $nbin;
		$binlist[$nbin] = $_;
	}
	close IN;
	# intervals are in reverse order
	my @temp = @binlist;
	for my $i ( 0..$#binlist )	{
		$binlist[$i] = $temp[$nbin - $i + 1];
	}
	for my $b ( keys %lookup )	{
		$lookup{$b} = $nbin - $lookup{$b} + 1;
	}
} elsif ( $SCALE > 0 )	{
	$numeric = 1;
	$nbin = int( $MAXMA / $SCALE );
	for my $i ( 1..$nbin )	{
		$base[$nbin - $i + 1] = $i * $SCALE;
		$top[$nbin - $i + 1] = ( $i - 1 ) * $SCALE;
		$binlist[$nbin - $i + 1] = $i;
	}
}

if ( ! @binlist )	{
	print "\nExiting the scale '$SCALE' seems to be misformatted\n\n";
	exit;
}

my (@shortnames,%maxma,%minma,$fref,%incoll,$totalcolls,$lastcoll,$totaloccs,$binnedcolls,%bincolls,%binoccs,%binabund,%bybin,%bybinref,%collfromref,%id,%idname,%allseen,%abund,%raw);
for my $file ( @FILES )	{
	my $shortname = $file;
	$shortname =~ s/\.(txt|csv|tab)//;
	push @shortnames , $shortname;
	if ( ! open IN,"<$PATH/$file" )	{
		print "\nExiting because $file couldn't be found\n\n";
		exit;
	}
	$_ = <IN>;
	s/\n//;
	my @f = split /,/,$_;
	# coll no is column 0
	my ($fgen,$fsp,$fabund,$fbin,$fbin2,$fmax,$fmin,$fextant);
	for my $i ( 0..$#f )	{
		if ( $f[$i] =~ /occurrence(|s).genus_name/ )	{
			$fgen = $i;
		} elsif ( $f[$i] =~ /occurrence(|s).species_reso/ )	{
			$fspreso = $i;
		} elsif ( $f[$i] =~ /occurrence(|s).species_name/ )	{
			$fsp = $i;
		} elsif ( $f[$i] =~ /occurrence(|s).abund_value/ )	{
			$fabund = $i;
		} elsif ( $f[$i] eq "stage" && ( $SCALE =~ /stages/i || $SCALE > 0 ) && $SCALE !~ /fr2|peters/i )	{
			$fbin = $i;
		} elsif ( $f[$i] eq "subepoch" && $SCALE =~ /epochs/i )	{
			$fbin = $i;
		} elsif ( $f[$i] eq "epoch" && $SCALE =~ /epochs/i )	{
			$fbin2 = $i;
		} elsif ( $f[$i] eq "FR2_bin" && $SCALE =~ /fr2/i )	{
			$fbin = $i;
		} elsif ( $f[$i] eq "Peters.interval" && $SCALE =~ /peters/i )	{
			$fbin = $i;
		} elsif ( $f[$i] eq "10_my_bin" && $SCALE !~ /stages|epochs|fr2|peters/i && $SCALE =~ /[^0-9]/ )	{
			$fbin = $i;
		} elsif ( $f[$i] eq "interpolated_base" && $INTERPOLATE =~ /y/i )	{
			$fmax = $i;
		} elsif ( $f[$i] eq "interpolated_top" && $INTERPOLATE =~ /y/i )	{
			$fmin = $i;
		} elsif ( $f[$i] =~ /(interval_base|ma_max|max_ma)$/ && $INTERPOLATE !~ /y/i  )	{
			$fmax = $i;
		} elsif ( $f[$i] =~ /(interval_top|ma_min|min_ma)$/ && $INTERPOLATE !~ /y/i )	{
			$fmin = $i;
		} elsif ( $f[$i] =~ /collection(|s).reference_no/ )	{
			$fref = $i;
		} elsif ( $f[$i] eq $RANK."_extant" )	{
			$fextant = $i;
		}
	}

	if ( $RANK =~ /sp/i && ! $fsp )	{
		print "\nExiting because $file doesn't have a species name field\n\n";
		exit;
	} elsif ( ( $SCALE > 0 || $SCALE eq "custom" ) && ! $fmax )	{
		print "\nExiting because $file doesn't have a maximum estimate field\n\n";
		exit;
	} elsif ( ( $SCALE > 0 || $SCALE eq "custom" ) && ! $fmin )	{
		print "\nExiting because $file doesn't have a minimum age estimate field\n\n";
		exit;
	} elsif ( $SCALE == 0 && $SCALE ne "custom" && ! $fbin )	{
		print "\nExiting because $file doesn't have the right time scale field\n\n";
		exit;
	} elsif ( $fref == 0 && ( $COLLSPERREF > 0 || $REFQUOTA > 0 || $SEQUENTIAL =~ /y/i ) )	{
		print "\nExiting because $file doesn't have a collection reference number field\n\n";
		exit;
	}

	while (<IN>)	{
		s/\n//;
		# a comma followed by a space is assumed to be embedded junk
		$_ =~ s/, / /g;
		my @f = split /,/,$_;
		$f[$fbin] =~ s/"//g;
		my $within = $lookup{$f[$fbin]};
		if ( ! $within && $lookup{$f[$fbin2]} )	{
			$within = $lookup{$f[$fbin2]};
		}
		if ( $SCALE > 0 || $SCALE eq "custom" )	{
			my $range = $f[$fmax] - $f[$fmin];
			$within = "";
			for my $b ( 1..$nbin )	{
				if ( $f[$fmax] eq "" )	{
					last;
				}
				my ($upper,$lower) = ($f[$fmin],$f[$fmax]);
				if ( $top[$b] > $f[$fmin] )	{
					$upper = $top[$b];
				}
				if ( $base[$b] < $f[$fmax] )	{
					$lower = $base[$b];
				}
				if ( $top[$b] <= $f[$fmin] && $base[$b] >= $f[$fmax] )	{
					$within = $b;
					last;
				} elsif ( $DEORPHAN >= 0.5 && $lower - $upper >= $range * $DEORPHAN )	{
					$within = $b;
					last;
				}
			}
			if ( $within ne "" )	{
				if ( $f[$fbin] > 0 && $fbin > 0 )	{
					$stageIn{$within}{$f[$fbin]}++;
				}
			}
		} elsif ( $within && $f[$fmax] > 0 )	{
			if ( $f[$fmax] > $maxma{$within} )	{
				$maxma{$within} = $f[$fmax];
			}
			if ( $f[$fmin] > 0 && ( $f[$fmin] < $minma{$within} || $minma{$within} == 0 ) )	{
				$minma{$within} = $f[$fmin];
			}
		}
		$totaloccs++;
		my $thiscoll = $f[0];
		if ( $thiscoll != $lastcoll )	{
			$totalcolls++;
		}
		# forces by-occurrence sampling
		if ( $BYCOLLECTION =~ /n/i )	{
			$f[0] = $totaloccs;
		}
		if ( $within > 0 && ( $ABUND !~ /y/i || $f[$abund] > 0 ) )	{
			my $name = $f[$fgen];
			if ( $RANK =~ /^s/i )	{
				# knocks out sp. spp. indet. most informals
				if ( $f[$fsp] =~ /[^a-z]/ || $f[$fspreso] =~ /inf/ )	{
					next;
				}
				$name .= " ".$f[$fsp];
			}
			if ( $thiscoll != $lastcoll )	{
				$bincolls{$within}++;
				$binnedcolls++;
			}
			$incoll{$f[0]}++;
			if ( $incoll{$f[0]} == 1 )	{
				push @{$bybin{$within}} , $f[0];
				push @{$bybinref{$within}{$f[$fref]}} , $f[0];
				$collfromref{$f[0]} = $f[$fref];
			}
			if ( ! $id{$name} )	{
				$ngen++;
				$id{$name} = $ngen;
				$idname{$ngen} = $name;
				$infile{$ngen} = $shortname;
			}
			push @{$list{$f[0]}} , $id{$name};
			$binoccs{$within}++;
			if ( $ABUND =~ /y/i )	{
				$abund{$f[0]}{$id{$name}} = $f[$fabund];
				$binabund{$within} += $f[$fabund];
			}
			if ( ! $allseen{$name}{$within} )	{
				$raw{$within}++;
				$allseen{$name}{$within}++;
				if ( ! $first{$name} || $within < $first{$name} )	{
					$first{$name} = $within;
				}
				if ( ! $last{$name} || $within > $last{$name} )	{
					$last{$name} = $within;
				}
				$extant{$name} = $f[$fextant];
			}
		}
		$lastcoll = $thiscoll;
	}
	close IN;
}

print "\n";
if ( ! $fref )	{
	print "Warning: because the file doesn't include reference numbers, Good's u will be\n based on taxa found in one collection instead of one reference\n\n";
}

print "full data set: $totalcolls collections and $totaloccs occurrences\n";
my $temp = 0;
$temp += $binoccs{$_} foreach keys %binoccs;
printf "binned data set: $binnedcolls collections and %d occurrences\n",$temp;

my (%refs,%refseen);
for my $bin ( reverse 1..$nbin )	{
	my @byrefs = keys %{$bybinref{$bin}};
	$refs{$bin} = $#byrefs + 1;
	$refseen{$_}++ foreach keys %{$bybinref{$bin}};
}

if ( $fref > 0 )	{
	my @temp = keys %refseen;
	printf "binned data derive from %d references\n",$#temp+1;
}

my (%rawrt,%rawbc,%rawsingle,%gap,%notgap,%rawone,%rawtwo,%rawthree,%rawpart);
my $totalextant;
for my $g ( keys %first )	{
	if ( $extant{$g} =~ /y/i )	{
		$last{$g} = $nbin + 1;  # bug fix
		$allseen{$g}{$nbin+1}++;  # new line
		$totalextant++;
	}
	if ( $first{$g} == $last{$g} )	{
		$rawsingle{$first{$g}}++;
	}
	for my $bin ( $first{$g}..$last{$g} )	{
		$rawrt{$bin}++;
		if ( $bin != $first{$g} )	{
			$rawbc{$bin}++;
		}
		if ( $bin != $first{$g} && $bin != $last{$g} )	{
			if ( ! $allseen{$g}{$bin} )	{
				$gap{$bin}++;
			} else	{
				$notgap{$bin}++;
			}
		}
		if ( ! $allseen{$g}{$bin-1} && $allseen{$g}{$bin} && ! $allseen{$g}{$bin+1} )	{
			$rawone{$bin}++;
		} elsif ( $allseen{$g}{$bin-1} && $allseen{$g}{$bin} )	{
			$rawtwo{$bin}++;
		}
		if ( $allseen{$g}{$bin-1} && $allseen{$g}{$bin} && $allseen{$g}{$bin+1} )	{
			$rawthree{$bin}++;
		} elsif ( $allseen{$g}{$bin-1} && ! $allseen{$g}{$bin} && $allseen{$g}{$bin+1} )	{
			$rawpart{$bin}++;
		}
		# forward gap fillers
		if ( $allseen{$g}{$bin-1} && $allseen{$g}{$bin} && ! $allseen{$g}{$bin+1} && $allseen{$g}{$bin+2} )	{
			$rawXXOX{$bin}++;
		} elsif ( $allseen{$g}{$bin-1} && ! $allseen{$g}{$bin} && ! $allseen{$g}{$bin+1} && $allseen{$g}{$bin+2} )	{
			$rawXOOX{$bin}++;
		}
		# backward gap fillers
		if ( $allseen{$g}{$bin-2} && ! $allseen{$g}{$bin-1} && $allseen{$g}{$bin} && $allseen{$g}{$bin+1} )	{
			$rawXXOXb{$bin}++;
		} elsif ( $allseen{$g}{$bin-2} && ! $allseen{$g}{$bin-1} && ! $allseen{$g}{$bin} && $allseen{$g}{$bin+1} )	{
			$rawXOOXb{$bin}++;
		}
	}
}
$ranks = ( $RANK =~ /^s/i ) ? "species" : "genera";
if ( $totalextant > 0 )	{
	print "$totalextant of the $ngen binned $ranks are extant\n\n";
} else	{
	print "none of the $ngen binned $ranks are extant\n\n";
}

my (%everdrawn,%everseen,%failed,%lastfailed,%collsused,%collsdrawn,%occsdrawn,%singledrawn,%subone,%subtwo,%maxgdrawn,%freq1,%freq2,%freq3,%freqmax,%ccurve,%atq,%bpdbybin);
$|=1;
print "trials: ";
for my $t ( 1..$TRIALS )	{
	print "\rtrials: $t ";
	my (%seen,%lastused,%usedintrial,%trialcolls,%genera);
	for my $bin ( reverse 1..$nbin )	{
		if ( $bincolls{$bin} >= $MINCOLLBYBIN && ( ! $failed{$bin} || $USEFAILED =~ /y/i ) )	{
			my (@colls,%inref,%drawn,%drawnfromref);
			my %collsinref = ();
			for my $r ( keys %{$bybinref{$bin}} )	{
				my $n = $#{$bybinref{$bin}{$r}} + 1;
				$collsinref{$_} = $n foreach @{$bybinref{$bin}{$r}};
			}
			if ( ( $REFQUOTA > 0 || $SEQUENTIAL =~ /y/i ) && $refs{$bin} >= $REFQUOTA )	{
				my @byrefs = keys %{$bybinref{$bin}};
				my @subrefs = ();
				my $myquota;
				if ( $REFQUOTA > 0 )	{
					$myquota = $REFQUOTA;
				} else	{
					$myquota = $refs{$bin};
				}
				while ( $#subrefs + 1 < $myquota && $#byrefs > -1 )	{
					my $i = int( rand( $#byrefs + 1 ) );
					push @subrefs , $byrefs[$i];
					splice @byrefs , $i , 1;
				}
				for my $s ( @subrefs )	{
					my @temp = @{$bybinref{$bin}{$s}};
					if ( $COLLSPERREF > 0 && scalar(@temp) >= $COLLSPERREF )	{
						for my $i ( 1..$COLLSPERREF )	{
							my $x = int( rand( $#temp + 1 ) );
							push @colls , $temp[$x];
							splice @temp , $x , 1;
						}
					} else	{
						push @colls , @temp;
					}
					for my $c ( @{$bybinref{$bin}{$s}} )	{
						$inref{$_}{$s}++ foreach @{$list{$c}};
					}
				}
			} elsif ( $COLLSPERREF > 0 )	{
				for my $r ( keys %{$bybinref{$bin}} )	{
					my @temp = @{$bybinref{$bin}{$r}};
					if ( $COLLSPERREF > 0 && scalar(@temp) >= $COLLSPERREF )	{
						for my $i ( 1..$COLLSPERREF )	{
							my $x = int( rand( $#temp + 1 ) );
							push @colls , $temp[$x];
							splice @temp , $x , 1;
						}
					} else	{
						push @colls , @temp;
					}
				}
			} else	{
				@colls = @{$bybin{$bin}};
				for my $r ( keys %{$bybinref{$bin}} )	{
					for my $c ( @{$bybinref{$bin}{$r}} )	{
						$inref{$_}{$r}++ foreach @{$list{$c}};
					}
				}
			}

			# these variables need to be recomputed during
			#  every trial because of the REFQUOTA algorithm
			@{$trialcolls{$bin}} = @colls;
			my $nocc = 0;
			my $ncoll = $#colls + 1;
			my %freq = ();

			for my $c ( @colls )	{
				if ( $ABUND =~ /y/i )	{
					$nocc += $abund{$c}{$_} foreach @{$list{$c}};
					$freq{$_} += $abund{$c}{$_} foreach @{$list{$c}};
				} else	{
					$nocc += $#{$list{$c}} + 1;
					$freq{$_}++ foreach @{$list{$c}};
				}
			}
			my ($maxocc,$maxc,$maxg) = (0,0,"");
			for my $g ( keys %freq )	{
				if ( $freq{$g} == 1 )	{
					$freq1{$bin}[$t]++;
				} elsif ( $freq{$g} == 2 )	{
					$freq2{$bin}[$t]++;
				} elsif ( $freq{$g} == 3 )	{
					$freq3{$bin}[$t]++;
				}
			}

			# classical rarefaction of occurrences
			if ( $METHOD eq "CR" )	{
				my @occs = ();
				for my $c ( @colls )	{
					if ( $ABUND =~ /y/i )	{
						for my $g ( @{$list{$c}} )	{
							for my $i ( 1..int($abund{$c}{$g}) )	{
								push @occs , $g;
							}
						}
					} else	{
						push @occs , $_ foreach @{$list{$c}};
					}
				}
				if ( $#occs + 1 >= $QUOTA )	{
					for $q ( 1..$QUOTA )	{
						my $i = int( rand( $#occs + 1 ) );
						my $g = $occs[$i];
						splice @occs , $i , 1;
						$drawn{$g}++;
						$everdrawn{$idname{$g}}{$bin}++;
						$occsdrawn{$bin}[$t]++;
						my @temp = keys %drawn;
						$ccurve{$bin}{$q} += $#temp + 1;
						$atq{$bin}{$q}++;
					}
				} else	{
					$failed{$bin}++;
					$lastfailed{$bin} = $t;
					%drawn = ();
				}
				push @{$genera{$bin}} , $_ foreach keys %drawn;
			# unweighted list subsampling
			} elsif ( $METHOD eq "UW" )	{
				for $q ( 1..$QUOTA )	{
					if ( $#colls == -1 )	{
						$failed{$bin}++;
						$lastfailed{$bin} = $t;
						%drawn = ();
						last;
					}
					my $c = int( rand( $#colls + 1 ) );
					$drawn{$_}++ foreach @{$list{$colls[$c]}};
					$everdrawn{$idname{$_}}{$bin}++ foreach @{$list{$colls[$c]}};
					$collsdrawn{$bin}[$t]++;
					$occsdrawn{$bin}[$t] += $#{$list{$colls[$c]}};
					splice @colls , $c , 1;
					my @temp = keys %drawn;
					$ccurve{$bin}{$q} += $#temp + 1;
					$atq{$bin}{$q}++;
				}
				push @{$genera{$bin}} , $_ foreach keys %drawn;
			# occurrences squared-weighted list subsampling
			} elsif ( $METHOD eq "O2W" )	{
				my $sumo2 = 0;
				while ( $sumo2 < $QUOTA )	{
					if ( $#colls == -1 )	{
						$failed{$bin}++;
						$lastfailed{$bin} = $t;
						%drawn = ();
						last;
					}
					my $c = int( rand( $#colls + 1 ) );
					my $o2 = ( $#{$list{$colls[$c]}} + 1 )**2;
					if ( $o2 + $sumo2 > $QUOTA && $o2 + $sumo2 - $QUOTA > $QUOTA - $sumo2 )	{
						last;
					}
					$drawn{$_}++ foreach @{$list{$colls[$c]}};
					$everdrawn{$idname{$_}}{$bin}++ foreach @{$list{$colls[$c]}};
					$collsdrawn{$bin}[$t]++;
					$occsdrawn{$bin}[$t] += $#{$list{$colls[$c]}};
					$sumo2 += $o2;
					splice @colls , $c , 1;
				}
				push @{$genera{$bin}} , $_ foreach keys %drawn;
			# default is SQS
			} else	{
				# SQS requires at least two collections
				if ( $bincolls{$bin} < 2 )	{
					next;
				}
				my ($notDominant,$singletons,$doubletons,$bpd) = (0,0,0,0);
				my ($maxocc,$maxc,$maxg) = (0,0,"");
				for my $c ( @colls )	{
					if ( $#{$list{$c}} + 1 > $maxocc )	{
						$maxocc = $#{$list{$c}} + 1;
						$maxc = $c;
					}
				}
			# treat singletons appearing in the largest collection
			#  as non-singletons
				my %inmax = ();
				if ( $BIGGEST =~ /y/i )	{
					for my $g ( @{$list{$maxc}} )	{
						$inmax{$g}++;
					}
				}
			# count singletons and doubletons and identify dominant
				delete $freq{''};
				my %thisfreq;
				for my $g ( keys %freq )	{
					$notDominant++;
					my @refs = keys %{$inref{$g}};
					$thisfreq{$g} = $#refs + 1;
					if ( $SINGLETONS !~ /ref/i )	{
						$thisfreq{$g} = $freq{$g};
					}
					if ( $thisfreq{$g} > 1 || $inmax{$g} > 0 )	{
						if ( $thisfreq{$g} == 2 )	{
							$doubletons++;
						}
						if ( $freq{$g} / $nocc > $bpd )	{
							$bpd = $freq{$g} / $nocc;
							$maxg = $g;
						}
					} else	{
						$singletons++;
					}
				}
			# include most common (dominant) taxon
				if ( $EXCLUDE !~ /y/i )	{
					$maxg = "";
				} else	{
					$notDominant--;
					if ( $thisfreq{$maxg} == 2 )	{
						$doubletons--;
					}
				}
				$freqmax{$bin} = $freq{$maxg};
				$bpdbybin{$bin} += $bpd;

			# exact SQS algorithm that requires sampling all
			#  occurrences and counting taxa at quorum level(s)
				if ( $EXACT !~ /n/i )	{
					my (@temp,@pool,%freqnow,%firstseen,%hitat,@drawnfrom,$lastfrom,$collsnow,$occsnow,$maxnow,$allnow,$singlenow,@fromnow,@subnow,$u,$lastu,$occsdrawn);
				# if SEQUENTIAL is true the collections are
				#  already in random order (and clustered by
				#  reference)
				# if not, randomize them here
					if ( $SEQUENTIAL !~ /y/i )	{
						my @temp;
						while ( @colls )	{
							my $x = int( rand( $#colls + 1 ) );
							push @temp, ( splice @colls , $x , 1 );
						}
						@colls = @temp;
					}
				# randomize occurrences within collections
				#  one way or another
					for my $c ( @colls )	{
						my @temp = @{$list{$c}};
						while ( @temp )	{
							my $x = int( rand( $#temp + 1 ) );
							push @pool , splice( @temp , $x , 1 );
							push @drawnfrom , $c;
						}
					}
					while ( @pool )	{
						my $g = shift @pool;
						my $from = shift @drawnfrom;
						if ( $from != $lastfrom )	{
							$lastfrom = $from;
							$collsnow++;
						}
						$freqnow{$g}++;
						$occsnow++;
						if ( $g eq $maxg )	{
							$maxnow++;
						}
						push @{$hitat{$g}} , $occsnow;
						if ( $freqnow{$g} == 1 )	{
							$allnow++;
							if ( ( $inmax{$g} == 0 || $freq{$g} > 1 ) && $g ne $maxg )	{
								$singlenow++;
							}
							$firstseen{$g} = $occsnow;
						} elsif ( $freqnow{$g} == 2 && ( $inmax{$g} == 0 || $freq{$g} > 1 ) && $g ne $maxg )	{
							$singlenow--;
						}
						$u = 0;
						if ( $occsnow > $maxnow )	{
							$u = 1 - $singlenow / ( $occsnow - $maxnow );
						}
						if ( $TRIALS <= 100 )	{
							push @{$uyielding[$bin][$allnow]} , $u;
							$didyield[$bin][$allnow]{$t}++;
						}
						if ( $allnow > $maxyielding )	{
							$maxyielding = $allnow;
						}
				# record current diversity
				# the last draw couldn't have been a singleton,
				#  so diversity couldn't have changed
				# record each sample size at which the quorum
				#  is exceeded
						if ( $u >= $QUORUM && $lastu < $QUORUM )	{
							push @fromnow , $collsnow;
							push @subnow , $occsnow;
						}
						$lastu = $u;
					}
				# identify taxa that were drawn before or at
				#  the median occurrence count yielding a quorum
					if ( @subnow )	{
						my ($i,$j) = ( int ( $#subnow / 2 ),int( ( $#subnow + 1 ) / 2 ) );
						$occsdrawn{$bin}[$t] = ( $subnow[$i] + $subnow[$j] ) / 2;
						$collsdrawn{$bin}[$t] = ( $fromnow[$i] + $fromnow[$j] ) / 2;
						my $o = int( $occsdrawn{$bin}[$t] );
						for my $g ( keys %firstseen )	{
							if ( $firstseen{$g} <= $o )	{
				# compute the number of times each taxon was
				#  drawn in order to identify the dominant in
				#  the subsample 
								my @temp = @{$hitat{$g}};
								while ( $temp[$#temp] > $o )	{
									pop @temp;
								}
								$drawn{$g} = $#temp + 1;
								$everdrawn{$idname{$g}}{$bin} += $#temp + 1;
							}
						}
					} else	{
						$failed{$bin}++;
						$lastfailed{$bin} = $t;
						$maxbybin{$bin} -= $maxsum;
						$bpdbybin{$bin} -= $bpd;
						if ( $USEFAILED =~ /y/i )	{
							%drawn = %freqnow;
						} else	{
							$occsdrawn{$bin}[$t] = -1;
						}
					}
				}

			# inexact SQS algorithm employing estimated frequencies
			# NOTE: Good's u sensu stricto isn't used here at all
			#  because the correction factor is subtracted from the
			#  frequencies instead of multiplied by them, and the
			#  equation's numerator and denominator are both
			#  different; the former involves the doubleton counts
			#  and the latter is the number of genera and not of
			#  occurrences
				else	{
					my $correction;
					if ( $notDominant > 0 )	{
						$correction = ( $singletons + $doubletons / 2 ) / $notDominant;
					} else	{
						$failed{$bin}++;
						$lastfailed{$bin} = $t;
					}
					my %prop = ();
					my $maxsum = 0;
					if ( $nocc > $freq{$maxg} )	{
						$prop{$_} = ( $freq{$_} - $correction ) / ( $nocc - $freq{$maxg} ) foreach keys %freq;
						delete $prop{$maxg};
						$maxsum += $prop{$_} foreach keys %prop;
					}
					$maxbybin{$bin} += $maxsum;

					my ($sumprop,$stopped) = (0,0);
					while ( $sumprop < $QUORUM )	{
						if ( $#colls == -1 || $nocc - $freq{$maxg} < 1 )	{
							$failed{$bin}++;
							$lastfailed{$bin} = $t;
							$maxbybin{$bin} -= $maxsum;
							$bpdbybin{$bin} -= $bpd;
							last;
						}
						my $c = -1;
				# draw collections with a probability inversely
				#  proportional to the number of collections
				#  per reference
						if ( $DISPERSE =~ /y/i && $SEQUENTIAL !~ /y/i )	{
							while ( $c == -1 )	{
								$c = int( rand( $#colls + 1 ) );
								if ( rand() > 1 / $collsinref{$colls[$c]} )	{
									$c = -1;
								}
							}
						}
				# draw all collections in a given reference
				#  before going to the next one
				# the collections were grouped by reference
				#  earlier in the reference randomization
				#  section, so simply draw them in order
						elsif ( $SEQUENTIAL =~ /y/i )	{
							$c = 0;
						}
				# conventional algorithm ignoring references
						else	{
							$c = int( rand( $#colls + 1 ) );
						}
						my $fromcoll = 0;
						for my $g ( @{$list{$colls[$c]}} )	{
							if ( ! $drawn{$g} && $g ne $maxg )	{
								$sumprop += $prop{$g};
								if ( $sumprop >= $QUORUM )	{
									$stopped++;
								}
							}
							$drawn{$g}++;
							$everdrawn{$idname{$g}}{$bin}++;
							$drawnfromref{$g}{$collfromref{$colls[$c]}}++;
					# random throwback algorithm
							if ( rand() > $prop{$g} && $sumprop >= $QUORUM )	{
								$sumprop -= $prop{$g};
								$everdrawn{$idname{$g}}{$bin} -= $drawn{$g};
								delete $drawn{$g};
								delete $drawnfromref{$g};
							} else	{
								if ( $ABUND =~ /y/i )	{
									$occsdrawn{$bin}[$t] += $abund{$colls[$c]}{$g};
								} else	{
									$occsdrawn{$bin}[$t]++;
								}
								$fromcoll++;
							}
							if ( $stopped > 0 )	{
								last;
							}
						}
						if ( $fromcoll > 0 )	{
							$collsdrawn{$bin}[$t]++;
						}
						if ( $stopped > 0 )	{
							last;
						}
						splice @colls , $c , 1;
					}
				}
				for my $g ( keys %drawn )	{
					push @{$genera{$bin}} , $g;
					my @refs = keys %{$drawnfromref{$g}};
					my $thisfreq = $#refs + 1;
					if ( $SINGLETONS !~ /ref/i )	{
						$thisfreq = $drawn{$g};
					}
					if ( $thisfreq == 1 && $g ne $maxg && ( $inmax{$g} == 0 || $freq{$g} > 1 ) )	{
						$singledrawn{$bin}[$t] += $drawn{$g};
					}
					if ( $drawn{$g} == 1 )	{
						$subone{$bin}[$t]++;
					} elsif ( $drawn{$g} == 2 )	{
						$subtwo{$bin}[$t]++;
					}
				}
				$maxgdrawn{$bin}[$t] = $drawn{$maxg};
			}
		}
	}

	# compute tallies for this trial
	my (%first,%last);
	for my $g ( keys %extant )	{
		if ( $extant{$g} =~ /y/i )	{
			$last{$id{$g}} = $nbin + 1;
			$seen{$nbin+1}{$id{$g}}++;
		}
	}
	for my $bin ( reverse 1..$nbin )	{
		if ( $bincolls{$bin} >= $MINCOLLBYBIN && ( $lastfailed{$bin} != $t || ! $failed{$bin} || $USEFAILED =~ /y/i ) )	{
			for my $c ( @{$trialcolls{$bin}} )	{
				$collsused{$bin}++;
				for my $g ( @{$list{$c}} )	{
					if ( $lastused{$g} != $bin )	{
						$genused{$bin}++;
						$usedintrial{$bin}++;
						$lastused{$g} = $bin;
					}
				}
			}
			for my $g ( @{$genera{$bin}} )	{
				if ( $g > 0 && ! $seen{$bin}{$g} )	{
					$sib{$bin}[$t]++;
					$byfile{$bin}{$infile{$g}}++;
					$first{$g} = $bin;
					if ( $last{$g} eq "" )	{
						$last{$g} = $bin;
					}
				}
				if ( $g > 0 && ! $seen{$bin}{$g} && $lastfailed{$bin+1} != $t && $lastfailed{$bin+2} != $t )	{
					if ( $seen{$bin+1}{$g} )	{
						$two{$bin+1}[$t]++;
					}
					if ( $seen{$bin+1}{$g} && $seen{$bin+2}{$g} )	{
						$three{$bin+1}[$t]++;
						$sumthree++;
						$binsumthree{$bin+1}++;
					} elsif ( ! $seen{$bin+1}{$g} && $seen{$bin+2}{$g} )	{
						$part{$bin+1}[$t]++;
						$sumpart++;
						$binsumpart{$bin+1}++;
					}
					# forward gap fillers
					if ( $seen{$bin+1}{$g} && ! $seen{$bin+2}{$g} && $seen{$bin+3}{$g} )	{
						$XXOX{$bin+1}[$t]++;
					} elsif ( ! $seen{$bin+1}{$g} && ! $seen{$bin+2}{$g} && $seen{$bin+3}{$g} )	{
						$XOOX{$bin+1}[$t]++;
					}
					# backward gap fillers
					if ( ! $seen{$bin+1}{$g} && $seen{$bin+2}{$g} && $seen{$bin+3}{$g} )	{
						$XXOXb{$bin+2}[$t]++;
					} elsif ( ! $seen{$bin+1}{$g} && ! $seen{$bin+2}{$g} && $seen{$bin+3}{$g} )	{
						$XOOXb{$bin+2}[$t]++;
					}
				}
				$seen{$bin}{$g}++;
				$everseen{$idname{$g}}{$bin}++;
			}
			for my $g ( keys %{$seen{$bin+1}} )	{
				if ( ! $seen{$bin}{$g} && ! $seen{$bin+2}{$g} )	{
					$one{$bin+1}[$t]++;
				}
			}
		} else	{
			$collsdrawn{$bin}[$t] = -1;
			$occsdrawn{$bin}[$t] = -1;
			$freq1{$bin}[$t] = -1;
			$freq2{$bin}[$t] = -1;
			$freq3{$bin}[$t] = -1;
			$sib{$bin}[$t] = -1;
			$one{$bin+1}[$t] = -1;
			$two{$bin+1}[$t] = -1;
			$three{$bin+1}[$t] = -1;
			$sumthree -= $binsumthree{$bin+1};
			$part{$bin+1}[$t] = -1;
			$sumpart -= $binsumpart{$bin+1};
			($XXOX{$bin+1}[$t],$XOOX{$bin+1}[$t],$XXOXb{$bin+1}[$t],$XOOXb{$bin+1}[$t]) = (-1,-1,-1,-1);
			($rt{$bin}[$t],$bc{$bin}[$t],$single{$bin}[$t]) = (-1,-1,-1);
			$singledrawn{$bin}[$t] = -1;
			$subone{$bin}[$t] = -1;
			$subtwo{$bin}[$t] = -1;
			$maxgdrawn{$bin}[$t] = -1;
		}
		if ( $lastfailed{$bin} == $t )	{
			my %freq;
			for my $c ( @{$bybin{$bin}} )	{
				$freq{$_}++ foreach @{$list{$c}};
			}
		}
	}
	for my $g ( keys %first )	{
		for my $bin ( $first{$g}..$last{$g} )	{
			$rt{$bin}[$t]++;
		}
		if ( $first{$g} == $last{$g} )	{
			$single{$first{$g}}[$t]++;
		} else	{
			for my $bin ( $first{$g}+1..$last{$g} )	{
				$bc{$bin}[$t]++;
			}
		}
	}
}
print "\n";

my ($firstbin,$lastbin) = ($nbin,1);
if ( $TRIM =~ /y/i )	{
	($firstbin,$lastbin) = ("","");
	for $b ( reverse 1..$nbin )	{
		if ( $bincolls{$b} >= $MINCOLLBYBIN && ( $failed{$b} < $TRIALS || $USEFAILED =~ /y/i ) )	{
			if ( ! $firstbin )	{
				$firstbin = $lastbin;
			}
			$lastbin = $b;
		}
	}
	if ( ! $firstbin )	{
		$firstbin = $lastbin;
	}
}

if ( $METHOD =~ /CR|UW/ )	{
	open OUT,">./quorumSubsampleCurves4.txt";
	print OUT "items";
	for $b ( reverse $lastbin..$firstbin )	{
		print OUT "\t$binlist[$b]";
	}
	print OUT "\n";
	for my $q ( 1..$QUOTA )	{
		my ($step,$step2) = (0,1);
		if ( $q > 1 )	{
			$step = int( log( $q - 1 ) / log( 10 ) * 10 );
			$step2 = int( log( $q ) / log( 10 ) * 10 );
		}
		if ( $step < $step2 || $q == $QUOTA )	{
			print OUT "$q";
			for $b ( reverse $lastbin..$firstbin )	{
				if ( $atq{$b}{$q} > 0 )	{
					printf OUT "\t%.1f",$ccurve{$b}{$q}/$atq{$b}{$q};
				} else	{
					print OUT "\tNA";
				}
			}
			print OUT "\n";
		}
	}
	close OUT;
} elsif ( $METHOD eq "SQS" && $EXACT !~ /n/i && $TRIALS <= 100 )	{
	# print median u values for diversity levels that were reached at
	#  least 99% of the time
	open OUT,">./quorumSubsampleCurves4.txt";
	print OUT "diversity";
	for $b ( reverse $lastbin..$firstbin )	{
		print OUT "\t$binlist[$b]";
	}
	print OUT "\n";
	for $d ( 1..$maxyielding )	{
		print OUT $d;
		for $b ( reverse $lastbin..$firstbin )	{
			@us = @{$uyielding[$b][$d]};
			@us = sort { $a <=> $b } @us;
			@did = keys %{$didyield[$b][$d]};
			if ( @us && $#did + 1 >= 0.99 * $TRIALS )	{
				my ($i,$j) = ( int ( $#us / 2 ),int( ( $#us + 1 ) / 2 ) );
				printf OUT "\t%.4f",( $us[$i] + $us[$j] ) / 2;
			} else	{
				print OUT "\tNA";
			}
		}
		print OUT "\n";
	}
	close OUT;
}

my %trials;
for my $bin ( reverse 1..$nbin )	{
	$trials{$bin} = $TRIALS;
	if ( $USEFAILED =~ /y/i )	{
		$trials{$bin} = $TRIALS - $failed{$bin};
	}
}

if ( $MATCHMAX =~ /y/i )	{
	my $maxdrawn;
	for my $bin ( reverse 1..$nbin )	{
		if ( $collsdrawn{$bin} / $trials{$bin} > $maxdrawn )	{
			$maxdrawn = $collsdrawn{$bin} / $trials{$bin};
		}
	}
	for my $bin ( reverse 1..$nbin )	{
		if ( $bincolls{$bin} < $maxdrawn || ( $collsused{$bin} > 0 && $collsused{$bin} / $trials{$bin} < $maxdrawn ) )	{
			$maxbybin{$bin} = "";
			$bpdbybin{$bin} = "";
			$sib{$bin} = "";
			$one{$bin+1} = "";
			$two{$bin+1} = "";
			$three{$bin+1} = "";
			$sumthree -= $binsumthree{$bin+1};
			$part{$bin+1} = "";
			$sumpart -= $binsumpart{$bin+1};
			($XXOX{$bin+1},$XOOX{$bin+1},$XXOXb{$bin+1},$XOOXb{$bin+1}) = ("","","","");
			($rt{$bin+1},$bc{$bin+1},$single{$bin+1}) = ("","","");
			$failed{$bin} = $TRIALS;
		}
	}
}

open OUT,">./quorumSubsample4.txt";
print OUT "Bin name\t";
if ( $SCALE > 0 )	{
	print OUT "Stages\t";
}
if ( $SCALE ne "custom" && $maxma{$b} > 0 )	{
	print OUT "Base\tMidpoint\tDuration\t";
} elsif ( $SCALE eq "custom" )	{
	print OUT "Base\tMidpoint\t";
}
if ( $fref > 0 )	{
	print OUT "References\t";
}
print OUT "Collections\t";
if ( $REFQUOTA > 0 || $COLLSPERREF > 0 )	{
	print OUT "Collections used\t";
}
if ( $METHOD !~ /CR/ )	{
	print OUT "Collections drawn\t";
}
print OUT "Occurrences\t";
if ( $ABUND =~ /y/i )	{
	print OUT "Specimens\t";
	print OUT "Specimens drawn\t";
} else	{
	print OUT "Occurrences drawn\t";
}
if ( $RANK =~ /^s/i )	{
	print OUT "Raw species\t";
} else	{
	print OUT "Raw genera\t";
}
if ( ( $REFQUOTA > 0 || $COLLSPERREF > 0 ) && $RANK =~ /^s/i )	{
	print OUT "Species used\t";
} elsif ( $REFQUOTA > 0 || $COLLSPERREF > 0 )	{
	print OUT "Genera used\t";
}
print OUT "Raw one timers\tRaw two timers\tRaw three timers\tRaw part timers";
print OUT "\tRaw forward gap fillers\tRaw backward gap fillers";
print OUT "\tRaw range throughs\t\Raw boundary crossers\tRaw single intervals\tGaps";
print OUT "\tGap statistic\tChao-2";
print OUT "\tSubsampled diversity\tThree timer diversity";
if ( $#FILES > 0 )	{
	for my $file ( @shortnames )	{
		print OUT "\t$file";
	}
}
print OUT "\tOne timers\tTwo timers\tThree timers\tPart timers\tThree timer sampling";
print OUT "\tForward gap fillers\tBackward gap fillers";
print OUT "\tRange throughs\tBoundary crossers\tSingle intervals";
print OUT "\tDrawn once";
print OUT "\tSubsampled Chao-2";
print OUT "\tDominance";
print OUT "\t\"Good's u\"";
if ( $METHOD =~ /^(SQS|)$/ )	{
	print OUT "\tMaximum quorum";
	print OUT "\tQuorum drawn";
}
if ( $USEFAILED =~ /y/i )	{
	print OUT "\tFailed trials";
}
print OUT "\n";
my $grandsprob = 1;
if ( $sumthree > 0 )	{
	$grandsprob = $sumthree / ( $sumthree + $sumpart );
	printf "overall average three timer sampling stat: %.3f\n",$grandsprob;
}
my @under;
for $b ( reverse $lastbin..$firstbin )	{
	if ( $SCALE == 0 )	{
		print OUT "\"$binlist[$b]\"";
		if ( $SCALE ne "custom" && $maxma{$b} > 0 )	{
			printf OUT "\t%.2f\t%.2f\t%.2f",$maxma{$b},( $maxma{$b} + $minma{$b} ) / 2,$maxma{$b} - $minma{$b};
		} elsif ( $SCALE eq "custom" )	{
			printf OUT "\t%.2f\t%.2f",$base[$b],( $base[$b] + $top[$b] ) / 2;
		} else	{
			print OUT "\tNA\tNA\tNA";
		}
	} else	{
		printf OUT "bin $b";
		my $stages = ( $stageIn{$b} ) ? '"'.join(', ',sort(keys(%{$stageIn{$b}}))).'"' : "NA";
		printf OUT "\t$stages";
		printf OUT "\t%.2f\t%.2f",$base[$b],( $base[$b] + $top[$b] ) / 2;
	}
	if ( ! $fref )	{
		printf OUT "\t$refs{$b}";
	} elsif ( $refs{$b} > 0 )	{
		printf OUT "\t$refs{$b}";
		print OUT "\t$bincolls{$b}";
	} else	{
		print OUT "\tNA\tNA";
	}
	if ( $notgap{$b} + $gap{$b} > 0 )	{
		$gapstat{$b} = sprintf( "%.3f",$notgap{$b} / ( $notgap{$b} + $gap{$b} ) );
	} else	{
		($gap{$b},$notgap{$b},$gapstat{$b}) = ("NA","NA","NA");
	}
	my $moccsdrawn = mean(\@{$occsdrawn{$b}});
	if ( $bincolls{$b} >= $MINCOLLBYBIN && ( $bincolls{$b} > 1 || $METHOD =~ /CR|UW|O2W/i ) && $moccsdrawn > 0 && $failed{$b} < $TRIALS && ( $failed{$b} == 0 || $USEFAILED =~ /y/i ) )	{
		my $mcollsdrawn;
		if ( $REFQUOTA > 0 || $COLLSPERREF > 0 )	{
			$collsused{$b} /= $trials{$b};
			printf OUT "\t%.2f",$collsused{$b};
		}
		if ( $METHOD !~ /CR/ )	{
			$mcollsdrawn = mean(\@{$collsdrawn{$b}});
			printf OUT "\t%.2f",$mcollsdrawn;
		}
		print OUT "\t$binoccs{$b}";
		if ( $ABUND =~ /y/i )	{
			print OUT "\t$binabund{$b}";
		}
		printf OUT "\t%.2f",$moccsdrawn;
		$genused{$b} /= $trials{$b};
		my $mfreq1 = mean(\@{$freq1{$b}});
		my $mfreq2 = mean(\@{$freq2{$b}});
		my $mfreq3 = mean(\@{$freq3{$b}});
		my $msib = mean(\@{$sib{$b}});
		my $mone = mean(\@{$one{$b}});
		my $mtwo = mean(\@{$two{$b}});
		my $mthree = mean(\@{$three{$b}});
		my $mpart = mean(\@{$part{$b}});
		# raw sampled diversity and genera used
		print OUT "\t$raw{$b}";
		if ( $REFQUOTA > 0 || $COLLSPERREF > 0 )	{
			printf OUT "\t%.2f",$genused{$b};
		}
		printf OUT "\t%d\t%d\t%d\t%d",$rawone{$b},$rawtwo{$b},$rawthree{$b},$rawpart{$b};
		# raw gap fillers
		printf OUT "\t%d",$rawXXOX{$b} + $rawXOOX{$b};
		printf OUT "\t%d",$rawXXOXb{$b} + $rawXOOXb{$b};
		# raw range-through, boundary crossers, single-interval taxa,
		#  and gap taxa
		printf OUT "\t%d\t%d\t%d\t%d",$rawrt{$b},$rawbc{$b},$rawsingle{$b},$gap{$b};
		# raw gap statistic
		print OUT "\t$gapstat{$b}";
		# raw Chao-2 estimator
		if ( $REFQUOTA > 0 || $COLLSPERREF > 0 )	{
			if ( $mfreq2 > 0 )	{
				printf OUT "\t%.2f",$genused{$b} + ( $mfreq1**2 / ( 2 * $mfreq2 ) );
			} else	{
				print OUT "\tNA";
			}
		} else	{
			if ( $mfreq2 > 0 )	{
				printf OUT "\t%.2f",$raw{$b} + ( $mfreq1**2 / ( 2 * $mfreq2 ) );
			} else	{
				print OUT "\tNA";
			}
		}
		# subsampled SIB
		if ( $msib > 0 )	{
			printf OUT "\t%.2f",$msib;
		} else	{
			print OUT "\tNA";
		}
		if ( $mpart > 0 && $msib > 0 )	{
			my $sprob = 1;
			if ( $mthree > 0 && $b != $nbin )	{
				$sprob = $mthree / ( $mthree + $mpart );
			}
		# SIB corrected with sampling stat
			printf OUT "\t%.2f",$msib * $grandsprob / $sprob;
			if ( $#FILES > 0 )	{
				for my $file ( @shortnames )	{
					$byfile{$b}{$file} /= $trials{$b};
					printf OUT "\t%.2f",$byfile{$b}{$file} * $grandsprob / $sprob;
				}
			}
			printf OUT "\t%.2f\t%.2f\t%.2f\t%.2f",$mone,$mtwo,$mthree,$mpart;
			printf OUT "\t%.3f",$sprob;
			printf OUT "\t%.2f",mean(\@{$XXOX{$b}}) + mean(\@{$XOOX{$b}});
			printf OUT "\t%.2f",mean(\@{$XXOXb{$b}}) + mean(\@{$XOOXb{$b}});
			printf OUT "\t%.2f\t%.2f\t%.2f",mean(\@{$rt{$b}}),mean(\@{$bc{$b}}),mean(\@{$single{$b}});
		# subsampled Chao-2 estimator
			printf OUT "\t%.2f",mean(\@{$subone{$b}});
			my ($one,$two) = (mean(\@{$subone{$b}}),mean(\@{$subtwo{$b}}));
			if ( $two > 0 )	{
				printf OUT "\t%.2f",$msib + ( $one**2 / ( 2 * $two ) );
			} else	{
				print OUT "\tNA";
			}

		} else	{
			print OUT "\tNA";
			if ( $#FILES > 0 )	{
				for my $file ( @shortnames )	{
					if ( $msib > 0 )	{
						$byfile{$b}{$file} /= $trials{$b};
						printf OUT "\t%.2f",$byfile{$b}{$file};
					} else	{
						print OUT "\tNA";
					}
				}
			}
			print OUT "\tNA\tNA\tNA\tNA\tNA";
			print OUT "\tNA\tNA\tNA\tNA";
			print OUT "\tNA\tNA\tNA";
		}
		# dominance
		$bpdbybin{$b} /= $trials{$b};
		printf OUT "\t%.4f",$bpdbybin{$b};
		# Good's u (basic equation)
		printf OUT "\t%.3f",1 - $mfreq1 /  ( $binoccs{$b} - $freqmax{$b} );
		# maximum quorum and quorum drawn
		if ( $METHOD =~ /^(SQS|)$/ )	{
			$maxbybin{$b} /= $trials{$b};
			printf OUT "\t%.3f",$maxbybin{$b};
			my $msingle = mean(\@{$singledrawn{$b}});
			my $mmax = mean(\@{$maxgdrawn{$b}});
			if ( $msingle < $moccsdrawn - $mmax )	{
				printf OUT "\t%.3f",1 - $msingle / ( $moccsdrawn - $mmax );
			} else	{
				print OUT "\t0.000";
			}
		}
		if ( $USEFAILED =~ /y/i )	{
			printf OUT "\t%d",$failed{$b};
		}
		print OUT "\n";
	} else	{
		if ( ! $numeric )	{
			push @under , $binlist[$b];
		} else	{
			push @under , $binlist[$b-1];
		}
		# collections used
		if ( $REFQUOTA > 0 || $COLLSPERREF > 0 )	{
			print OUT "\tNA";
		}
		if ( $METHOD !~ /CR/ )	{
			print OUT "\tNA";
		}
		# occurrences
		if ( $binoccs{$b} > 0 )	{
			print OUT "\t$binoccs{$b}";
		} else	{
			print OUT "\tNA";
		}
		if ( $ABUND =~ /y/i && $binabund{$b} > 0 )	{
			print OUT "\t$binabund{$b}";
		} elsif ( $ABUND =~ /y/i )	{
			print OUT "\tNA";
		}
		# occurrences drawn
		print OUT "\tNA";
		# raw sampled diversity and genera used
		if ( $raw{$b} > 0 )	{
			print OUT "\t$raw{$b}";
		} else	{
			print OUT "\tNA";
		}
		if ( $REFQUOTA > 0 || $COLLSPERREF > 0 )	{
			print OUT "\tNA";
		}
		if ( $rawtwo{$b} > 0 )	{
			printf OUT "\t%d\t%d\t%d\t%d",$rawone{$b},$rawtwo{$b},$rawthree{$b},$rawpart{$b};
			printf OUT "\t%d",$rawXXOX{$b} + $rawXOOX{$b};
			printf OUT "\t%d",$rawXXOXb{$b} + $rawXOOXb{$b};
		} else	{
			print OUT "\tNA\tNA\tNA\tNA\tNA\tNA";
		}
		# raw range-through, boundary crossers, single-interval taxa,
		#  and gap taxa
		printf OUT "\t%d\t%d\t%d\t%d",$rawrt{$b},$rawbc{$b},$rawsingle{$b},$gap{$b};
		# raw gap statistic, raw Chao-2, subsampled SIB, and
		#  3T diversity estimate
		print OUT "\tNA\tNA\tNA\tNA";
		if ( $#FILES > 0 )	{
			for my $file ( @shortnames )	{
				print OUT "\tNA";
			}
		}
		# 1T, 2T, 3T, PT, sampling, and gap fillers
		print OUT "\tNA\tNA\tNA\tNA\tNA\tNA\t";
		# RT, BC, single intervals
		print OUT "\tNA\tNA\tNA";
		# drawn once, subsampled Chao-2, dominance
		print OUT "\tNA\tNA\tNA";
		# Good's u, maximum quorum, quorum drawn
		print OUT "\tNA";
		if ( $METHOD =~ /^(SQS|)$/ )	{
			print OUT "\tNA\tNA";
		}
		if ( $USEFAILED =~ /y/i )	{
			print OUT "\tNA";
		}
		print OUT "\n";
	}
}
close OUT;

open OUT,">./quorumSubsampleRanges4.txt";
print OUT "genus";
for $b ( $lastbin..$firstbin )	{
	print OUT "\t$binlist[$b]";
}
print OUT "\n";
my @genera = keys %everseen;
@genera = sort { $a cmp $b } @genera;
for my $g ( @genera )	{
	print OUT $g;
	for $b ( $lastbin..$firstbin )	{
		if ( $everseen{$g}{$b} == 0 )	{
			$n = "0";
		} elsif ( $TRIALS <= 10 )	{
			$n = sprintf "%.2f",$everseen{$g}{$b} / $TRIALS;
		} else	{
			$n = sprintf "%.3f",$everseen{$g}{$b} / $TRIALS;
		}
		print OUT "\t$n";
	}
	print OUT "\n";
}
close OUT;

open OUT,">./quorumSubsampleOccurrences4.txt";
print OUT "genus";
for $b ( $lastbin..$firstbin )	{
	print OUT "\t$binlist[$b]";
}
print OUT "\n";
for my $g ( @genera )	{
	print OUT $g;
	for $b ( $lastbin..$firstbin )	{
		if ( $everdrawn{$g}{$b} == 0 )	{
			$n = "0";
		} elsif ( $TRIALS <= 10 )	{
			$n = sprintf "%.2f",$everdrawn{$g}{$b} / $TRIALS;
		} else	{
			$n = sprintf "%.3f",$everdrawn{$g}{$b} / $TRIALS;
		}
		print OUT "\t$n";
	}
	print OUT "\n";
}
close OUT;

if ( $#under == 0 )	{
	print "$under[0] is under quota\n";
} elsif ( $#under == 1 )	{
	print "$under[0] and $under[1] are under quota\n";
} elsif ( $#under > 0 )	{
	$under[$#under] = "and ".$under[$#under];
	printf "%d intervals are under quota (".join(', ',@under).")\n",$#under + 1;
}

print "\n";

sub mean	{
	my @x = @{$_[0]};
	my $mean;
	for my $i ( 1..$TRIALS )	{
		$mean += $x[$i];
	}
	if ( $mean == 0 )	{
		return 0;
	}
	$mean = $mean / $TRIALS;
	# needed to prevent printf rounding errors
	$mean += 0.0000001;
	return $mean;
}

# version history:
# first draft 19-20.4.09
# version 1.0 release date 20.6.09
# version 2.0 release date 13.9.09
# new features: Pielou's J correction replaced with exclude-most-common-taxon
#  rule; throwback algorithm added; Gradstein stages scale can be used; TRIM
#  option added; outputs more stats, including occurrences drawn and counts of
#  taxa with one, two, or three occurrences; output file column names revised;
#  prints taxa-by-bins output file 
# version 3.0 release date 12.3.10
# new features: coverage now estimated with a version of Good's u that uses
#  counts of taxa found in a single reference instead of taxa with only one
#  occurrence; abundance data and Gradstein subepochs scale can be used;
#  outputs collection and occurrence totals; prints additional totals to screen
# version 3.1 release date 9.7.10
# new features: bug fixes in computation of quorumSubsampleCurves3.txt file
#  and output of two timer counts
# version 3.2 release date 26.7.10 
# new feature: INTERPOLATE option
# version 3.3 release date 11.12.10
# new feature: DISPERSE option
# version 4.0: 19-21.8.11
# new features:
#  improved inexact subsampling algorithm based on sqs version 3.1
#  new EXACT and SEQUENTIAL algorithms
#  BYCOLLECTION, SINGLETONS, EXCLUDE, and BIGGEST made optional
#  means of count variables are geometric instead of arithmetic
#  subsampling curves are printed when EXACT is used
# version 4.1: 3.3.14
# new features:
#  custom time scales can be used
#  multiple updates and bug fixes in time scale and occurrence parsing routines
#  gap filler statistics and lots of additional basic stats are now output
#  switched back to taking arithmetic means because counts can be considered to
#   have been generated by a Poisson process
# version 4.2: 24.6.14
#  a couple of minor bug fixes
# version 4.3: 29.7.14
#  a couple more minor bug fixes