AnonSec Team
Server IP : 103.11.96.170  /  Your IP : 3.145.170.67
Web Server : Microsoft-IIS/10.0
System : Windows NT WIN-F6SLGVICLOP 10.0 build 17763 (Windows Server 2016) AMD64
User : elibrary.unsap.ac.id ( 0)
PHP Version : 7.4.19
Disable Function : NONE
MySQL : OFF  |  cURL : ON  |  WGET : OFF  |  Perl : OFF  |  Python : OFF
Directory (0777) :  D:/localhost/elibrary/repository/font/../../simbio2/simbio_UTILS/

[  Home  ][  C0mmand  ][  Upload File  ]

Current File : D:/localhost/elibrary/repository/font/../../simbio2/simbio_UTILS/simbio_tokenizecql.v2.inc.php
<?php
/**
 *
 * CQL Tokenizer
 *
 * Copyright (C) 2009 Arie Nugraha (dicarve@yahoo.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */

// be sure that this file not accessed directly
if (!defined('INDEX_AUTH')) {
    die("can not access this file directly");
} elseif (INDEX_AUTH != 1) {
    die("can not access this file directly");
}

/**
 * CQL Tokenizer
 * Tokenize CQL string to array for easy proccessing
 *
 * @param   string  $str_query
 * @param   array   $arr_stop_words
 * @return  string
 **/
function simbio_tokenizeCQL($str_query, $arr_searcheable_fields, $arr_stop_words = array(), $int_max_words = 20)
{
    // buffer vars
    $_array_queries = array();
    $_new_q = '';
    $_last_boolean = '+';
    $_previous_field = 'title';
    $_current_field = 'title';
    $_phrase = '';
    $_q_buff = '';
    // add dummy field
    $arr_searcheable_fields[] = 'xxxxx';
    // make sure there is no spaces between equation mark
    $str_query = preg_replace('@\s+=\s+@i', '=', $str_query);
    // add dummy field add the end of query
    $str_query .= ' xxxxx=xxxxx';
    // inside quote flag
    $_inside_quote = false;
    // tokenizing string one by one
    $_token = strtok(strtolower($str_query), " \n\t");
    // word counter
    $_word_count = 0;
    while ($_token !== false) {
        // SAFEGUARD!
        if ($_word_count > $int_max_words) {
            break;
        }
        $_token = trim($_token);
        // check for stopwords
        if (in_array($_token, $arr_stop_words) AND !$_inside_quote) {
            // do nothing
            $_token = strtok(" \n\t");
            continue;
        }
        // check boolean mode
        if (in_array($_token, array('exact', 'and', 'or', 'not'))) {
            if ($_token == 'exact' AND !$_inside_quote) {
                $_last_boolean = '++';
            } else if ($_token == 'or' AND !$_inside_quote) {
                $_last_boolean = '*';
            } else if ($_token == 'not' AND !$_inside_quote) {
                $_last_boolean = '-';
            } else {
                $_last_boolean = '+';
            }
            // we continue to the next loop
            $_token = strtok(" \n\t");
            continue;
        }
        // check for current field
        foreach ($arr_searcheable_fields as $_search_field) {
            if ((strpos($_token, $_search_field.'=') === 0) AND !$_inside_quote) {
                $_current_field = $_search_field;
                $_token = str_replace($_search_field.'=', '', $_token);
                if ($_word_count > 0) {
                    $_array_queries[] = array('f' => 'boolean', 'b' => $_last_boolean);
                }
                $_last_boolean = '+';
            }
        }
        // check if we are inside quotes
        if (strpos($_token, '"') === 0) {
            $_inside_quote = true;
            // remove the first quote
            $_token = substr_replace($_token, '', 0, 1);
        }
        if ($_inside_quote) {
            if (strpos($_token, '"') === strlen($_token)-1) {
                $_inside_quote = false;
                $_phrase .= str_replace('"', '', $_token);
                $_q_buff .= "\"$_phrase\" ";
                // reset
                $_phrase = '';
            } else {
                $_phrase .= str_replace('"', '', $_token).' ';
                // we continue to the next loop and concatenating words
                $_token = strtok(" \n\t");
                continue;
            }
        } else {
            if ($_current_field == $_previous_field) {
                $_q_buff .= "$_token ";
            } else if (($_current_field != $_previous_field) && !empty($_q_buff)) {
                $_array_queries[] = array('f' => $_previous_field, 'b' => $_last_boolean, 'q' => trim($_q_buff));
                $_q_buff = "$_token ";
            } else {
                $_array_queries[] = array('f' => $_current_field, 'b' => $_last_boolean, 'q' => trim($_token));
                $_q_buff = '';
            }
        }
        // set previous field flag
        $_previous_field = $_current_field;
        // re-toke
        $_token = strtok(" \n\t");
        // add word counter
        $_word_count++;
    }
    $_array_queries[] = array('f' => 'cql_end');
    return $_array_queries;
}

AnonSec - 2021