??????????????
Warning: Cannot modify header information - headers already sent by (output started at /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php:4) in /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php on line 173
Warning: Cannot modify header information - headers already sent by (output started at /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php:4) in /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php on line 174
Warning: Cannot modify header information - headers already sent by (output started at /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php:4) in /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php on line 175
Warning: Cannot modify header information - headers already sent by (output started at /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php:4) in /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php on line 176
Warning: Cannot modify header information - headers already sent by (output started at /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php:4) in /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php on line 177
Warning: Cannot modify header information - headers already sent by (output started at /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php:4) in /home/mybf1/public_html/class.bf1.my/wp-includes/js/dist/index.php on line 178
#---------------------------------------------------------------------
package IO::HTML;
#
# Copyright 2012 Christopher J. Madsen
#
# Author: Christopher J. Madsen
# Created: 14 Jan 2012
#
# This program is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either the
# GNU General Public License or the Artistic License for more details.
#
# ABSTRACT: Open an HTML file with automatic charset detection
#---------------------------------------------------------------------
use 5.008;
use strict;
use warnings;
use Carp 'croak';
use Encode 2.10 qw(decode find_encoding); # need utf-8-strict encoding
use Exporter 5.57 'import';
our $VERSION = '1.00';
# This file is part of IO-HTML 1.00 (February 23, 2013)
our $default_encoding ||= 'cp1252';
our @EXPORT = qw(html_file);
our @EXPORT_OK = qw(find_charset_in html_file_and_encoding html_outfile
sniff_encoding);
our %EXPORT_TAGS = (
rw => [qw( html_file html_file_and_encoding html_outfile )],
all => [ @EXPORT, @EXPORT_OK ],
);
#=====================================================================
sub html_file
{
(&html_file_and_encoding)[0]; # return just the filehandle
} # end html_file
# Note: I made html_file and html_file_and_encoding separate functions
# (instead of making html_file context-sensitive) because I wanted to
# use html_file in function calls (i.e. list context) without having
# to write "scalar html_file" all the time.
sub html_file_and_encoding
{
my ($filename, $options) = @_;
$options ||= {};
open(my $in, '<:raw', $filename) or croak "Failed to open $filename: $!";
my ($encoding, $bom) = sniff_encoding($in, $filename, $options);
if (not defined $encoding) {
croak "No default encoding specified"
unless defined($encoding = $default_encoding);
$encoding = find_encoding($encoding) if $options->{encoding};
} # end if we didn't find an encoding
binmode $in, sprintf(":encoding(%s):crlf",
$options->{encoding} ? $encoding->name : $encoding);
return ($in, $encoding, $bom);
} # end html_file_and_encoding
#---------------------------------------------------------------------
sub html_outfile
{
my ($filename, $encoding, $bom) = @_;
if (not defined $encoding) {
croak "No default encoding specified"
unless defined($encoding = $default_encoding);
} # end if we didn't find an encoding
elsif (ref $encoding) {
$encoding = $encoding->name;
}
open(my $out, ">:encoding($encoding)", $filename)
or croak "Failed to open $filename: $!";
print $out "\x{FeFF}" if $bom;
return $out;
} # end html_outfile
#---------------------------------------------------------------------
sub sniff_encoding
{
my ($in, $filename, $options) = @_;
$filename = 'file' unless defined $filename;
$options ||= {};
my $pos = tell $in;
croak "Could not seek $filename: $!" if $pos < 0;
croak "Could not read $filename: $!" unless defined read $in, my $buf, 1024;
seek $in, $pos, 0 or croak "Could not seek $filename: $!";
# Check for BOM:
my $bom;
my $encoding = do {
if ($buf =~ /^\xFe\xFF/) {
$bom = 2;
'UTF-16BE';
} elsif ($buf =~ /^\xFF\xFe/) {
$bom = 2;
'UTF-16LE';
} elsif ($buf =~ /^\xEF\xBB\xBF/) {
$bom = 3;
'utf-8-strict';
} else {
find_charset_in($buf, $options); # check for
}
}; # end $encoding
if ($bom) {
seek $in, $bom, 1 or croak "Could not seek $filename: $!";
$bom = 1;
}
elsif (not defined $encoding) { # try decoding as UTF-8
my $test = decode('utf-8-strict', $buf, Encode::FB_QUIET);
if ($buf =~ /^(?: # nothing left over
| [\xC2-\xDF] # incomplete 2-byte char
| [\xE0-\xEF] [\x80-\xBF]? # incomplete 3-byte char
| [\xF0-\xF4] [\x80-\xBF]{0,2} # incomplete 4-byte char
)\z/x and $test =~ /[^\x00-\x7F]/) {
$encoding = 'utf-8-strict';
} # end if valid UTF-8 with at least one multi-byte character:
} # end if testing for UTF-8
if (defined $encoding and $options->{encoding} and not ref $encoding) {
$encoding = find_encoding($encoding);
} # end if $encoding is a string and we want an object
return wantarray ? ($encoding, $bom) : $encoding;
} # end sniff_encoding
#=====================================================================
# Based on HTML5 8.2.2.1 Determining the character encoding:
# Get attribute from current position of $_
sub _get_attribute
{
m!\G[\x09\x0A\x0C\x0D /]+!gc; # skip whitespace or /
return if /\G>/gc or not /\G(=?[^\x09\x0A\x0C\x0D =]*)/gc;
my ($name, $value) = (lc $1, '');
if (/\G[\x09\x0A\x0C\x0D ]*=[\x09\x0A\x0C\x0D ]*/gc
and (/\G"([^"]*)"?/gc or
/\G'([^']*)'?/gc or
/\G([^\x09\x0A\x0C\x0D >]*)/gc)) {
$value = lc $1;
} # end if attribute has value
return wantarray ? ($name, $value) : 1;
} # end _get_attribute
# Examine a meta value for a charset:
sub _get_charset_from_meta
{
for (shift) {
while (/charset[\x09\x0A\x0C\x0D ]*=[\x09\x0A\x0C\x0D ]*/ig) {
return $1 if (/\G"([^"]*)"/gc or
/\G'([^']*)'/gc or
/\G(?!['"])([^\x09\x0A\x0C\x0D ;]+)/gc);
}
} # end for value
return undef;
} # end _get_charset_from_meta
#---------------------------------------------------------------------
sub find_charset_in
{
for (shift) {
my $options = shift || {};
my $stop = length > 1024 ? 1024 : length; # search first 1024 bytes
my $expect_pragma = (defined $options->{need_pragma}
? $options->{need_pragma} : 1);
pos() = 0;
while (pos() < $stop) {
if (/\G