BERT in Keras with Tensorflow hub

First, we load the same IMDB data we used previously:# Load all files from a directory in a DataFrame.

def load_directory_data(directory): data = {} data["sentence"] = [] data["sentiment"] = [] for file_path in os.

listdir(directory): with tf.

gfile.

GFile(os.

path.

join(directory, file_path), "r") as f: data["sentence"].

append(f.

read()) data["sentiment"].

append(re.

match("d+_(d+).

txt", file_path).

group(1)) return pd.

DataFrame.

from_dict(data)# Merge positive and negative examples, add a polarity column and shuffle.

def load_dataset(directory): pos_df = load_directory_data(os.

path.

join(directory, "pos")) neg_df = load_directory_data(os.

path.

join(directory, "neg")) pos_df["polarity"] = 1 neg_df["polarity"] = 0 return pd.

concat([pos_df, neg_df]).

sample(frac=1).

reset_index(drop=True)# Download and process the dataset files.

def download_and_load_datasets(force_download=False): dataset = tf.

keras.

utils.

get_file( fname="aclImdb.

tar.

gz", origin="http://ai.

stanford.

edu/~amaas/data/sentiment/aclImdb_v1.

tar.

gz", extract=True)train_df = load_dataset(os.

path.

join(os.

path.

dirname(dataset), "aclImdb", "train")) test_df = load_dataset(os.

path.

join(os.

path.

dirname(dataset), "aclImdb", "test"))return train_df, test_df# Reduce logging output.

tf.

logging.

set_verbosity(tf.

logging.

ERROR)train_df, test_df = download_and_load_datasets()# Create datasets (Only take up to `max_seq_length` words for memory)train_text = train_df['sentence'].

tolist()train_text = [' '.

join(t.

split()[0:max_seq_length]) for t in train_text]train_text = np.

array(train_text, dtype=object)[:, np.

newaxis]train_label = train_df['polarity'].

tolist()test_text = test_df['sentence'].

tolist()test_text = [' '.

join(t.

split()[0:max_seq_length]) for t in test_text]test_text = np.

array(test_text, dtype=object)[:, np.

newaxis]test_label = test_df['polarity'].

tolist()Next, we tokenize the data using the tf-hub model, which simplifies preprocessing:# Instantiate tokenizertokenizer = create_tokenizer_from_hub_module()# Convert data to InputExample formattrain_examples = convert_text_to_examples(train_text, train_label)test_examples = convert_text_to_examples(test_text, test_label)# Convert to features(train_input_ids, train_input_masks, train_segment_ids, train_labels ) = convert_examples_to_features(tokenizer, train_examples, max_seq_length=max_seq_length)(test_input_ids, test_input_masks, test_segment_ids, test_labels) = convert_examples_to_features(tokenizer, test_examples, max_seq_length=max_seq_length)We next build a custom layer using Keras, integrating BERT from tf-hub.

The model is very large (110,302,011 parameters!!!) so we fine tune a subset of layers.

class BertLayer(tf.

layers.

Layer): def __init__(self, n_fine_tune_layers=10, **kwargs): self.

n_fine_tune_layers = n_fine_tune_layers self.

trainable = True self.

output_size = 768 super(BertLayer, self).

__init__(**kwargs)def build(self, input_shape): self.

bert = hub.

Module( bert_path, trainable=self.

trainable, name="{}_module".

format(self.

name) )trainable_vars = self.

bert.

variables# Remove unused layers trainable_vars = [var for var in trainable_vars if not "/cls/" in var.

name]# Select how many layers to fine tune trainable_vars = trainable_vars[-self.

n_fine_tune_layers :]# Add to trainable weights for var in trainable_vars: self.

_trainable_weights.

append(var) for var in self.

bert.

variables: if var not in self.

_trainable_weights: self.

_non_trainable_weights.

append(var)super(BertLayer, self).

build(input_shape)def call(self, inputs): inputs = [K.

cast(x, dtype="int32") for x in inputs] input_ids, input_mask, segment_ids = inputs bert_inputs = dict( input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids ) result = self.

bert(inputs=bert_inputs, signature="tokens", as_dict=True)[ "pooled_output" ] return resultdef compute_output_shape(self, input_shape): return (input_shape[0], self.

output_size)Now, we can easily build and train our model using the BERT layer:# Build modelin_id = tf.

keras.

layers.

Input(shape=(max_seq_length,), name="input_ids")in_mask = tf.

keras.

layers.

Input(shape=(max_seq_length,), name="input_masks")in_segment = tf.

keras.

layers.

Input(shape=(max_seq_length,), name="segment_ids")bert_inputs = [in_id, in_mask, in_segment]bert_output = BertLayer(n_fine_tune_layers=10)(bert_inputs)dense = tf.

keras.

layers.

Dense(256, activation='relu')(bert_output)pred = tf.

keras.

layers.

Dense(1, activation='sigmoid')(dense)model = tf.

keras.

models.

Model(inputs=bert_inputs, outputs=pred)model.

compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])model.

fit( [train_input_ids, train_input_masks, train_segment_ids], train_labels, validation_data=([test_input_ids, test_input_masks, test_segment_ids], test_labels), epochs=1, batch_size=32)Using a GPU for large models like BERT is advised!Pretty easy!.See the full notebook on Github and build cool stuff!.

. More details

Leave a Reply