Ocr llama API Library #1

Open
opened 2025-02-17 11:12:57 +07:00 by sas.padmanto · 2 comments

Model : llama3.2-vision

library :
application/libraries/Ocr_llama.php

<?php

defined('BASEPATH') or exit('No direct script access allowed');
class Ocr_llama
{
  private $ollamaUrl = "http://10.9.10.205:11434/api/chat";
  private $modelName = "llama3.2-vision:11b";
  private $temperature = 0.1;

  public function extract_ocr($imagePath)
  {
    if (!file_exists($imagePath)) {
      return ["status"=>"ERR", "message"=> "Error: File not found."];
    }

    // Load image and convert to black and white
    $image = imagecreatefromstring(file_get_contents($imagePath));
    imagefilter($image, IMG_FILTER_GRAYSCALE);

    // Get original dimensions and resize
    // $newWidth = intval(imagesx($image) / 2);
    // $newHeight = intval(imagesy($image) / );
    // $resizedImage = imagecreatetruecolor($newWidth, $newHeight);
    // imagecopyresampled($resizedImage, $image, 0, 0, 0, 0, $newWidth, $newHeight, imagesx($image), imagesy($image));

    // Convert image to base64
    ob_start();
    imagejpeg($image);
    $imageData = ob_get_clean();
    $imgStr = base64_encode($imageData);

    // Prepare request payload
    $payload = json_encode([
      "model" => $this->modelName,
      "messages" => [[
        "role" => "user",
        "content" => "Extract all text from the given image. Return result only, in the pattern label:value as it is, without translation.",
        "images" => [$imgStr]
      ]],
      "options" => ["temperature" => $this->temperature],
      "stream" => false
    ]);

    // Initialize cURL
    $ch = curl_init($this->ollamaUrl);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_POST, true);
    curl_setopt($ch, CURLOPT_HTTPHEADER, ["Content-Type: application/json"]);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);

    // Execute request
    $response = curl_exec($ch);
    if (curl_errno($ch)) {
      $error = curl_error($ch);
      curl_close($ch);
      return ["status" => "ERR", "message" => curl_error($ch)];
    }
    curl_close($ch);
    // Decode response
    // Remove <pre> tags if present
    $response = strip_tags($response);

    $responseData = json_decode($response, true);
    if (!$responseData || !isset($responseData['message']['content'])) {
      return ["status"=>"ERR", "message"=> "raw : $response"];
    }

    // Parse content into JSON object
    $content = $responseData["message"]["content"];
    // echo "content:\n";
    // echo $content . "\n";
    // echo "\n";
    $parsedData = [];
    $lines = explode("\n", $content);
    foreach ($lines as $line) {
      $parts = explode(":", $line, 2);
      if (count($parts) == 2) {
        $key = trim(str_replace("*","",$parts[0]));
        $key = strtolower($key);
        $key = str_replace(" ","_",$key);
        $value = trim(str_replace("*","",$parts[1]));
        
        if (strpos($key, "/")) {
          $a_key = explode("/", $key);
          $a_val = explode(",", $value);
          if (strpos($value, "/")) {
            $a_val = explode("/", $value);
          }
          foreach($a_key as $a_idx => $sub_key) {
            $sub_val = $a_val[$a_idx];
            $parsedData[trim($sub_key)] = trim($sub_val);
          }
        } else {
          $parsedData[$key] = $value;
        }
      }
    }
    $info["total_duration"] = $responseData["total_duration"];
    $info["prompt_eval_duration"] = $responseData["prompt_eval_duration"];
    $info["eval_duration"] = $responseData["eval_duration"];
    $info["model"] = $responseData["model"];
    return ["status"=>"OK", "data" => $parsedData, "info"=> $info];
  }
}
Model : llama3.2-vision library : application/libraries/Ocr_llama.php ```php <?php defined('BASEPATH') or exit('No direct script access allowed'); class Ocr_llama { private $ollamaUrl = "http://10.9.10.205:11434/api/chat"; private $modelName = "llama3.2-vision:11b"; private $temperature = 0.1; public function extract_ocr($imagePath) { if (!file_exists($imagePath)) { return ["status"=>"ERR", "message"=> "Error: File not found."]; } // Load image and convert to black and white $image = imagecreatefromstring(file_get_contents($imagePath)); imagefilter($image, IMG_FILTER_GRAYSCALE); // Get original dimensions and resize // $newWidth = intval(imagesx($image) / 2); // $newHeight = intval(imagesy($image) / ); // $resizedImage = imagecreatetruecolor($newWidth, $newHeight); // imagecopyresampled($resizedImage, $image, 0, 0, 0, 0, $newWidth, $newHeight, imagesx($image), imagesy($image)); // Convert image to base64 ob_start(); imagejpeg($image); $imageData = ob_get_clean(); $imgStr = base64_encode($imageData); // Prepare request payload $payload = json_encode([ "model" => $this->modelName, "messages" => [[ "role" => "user", "content" => "Extract all text from the given image. Return result only, in the pattern label:value as it is, without translation.", "images" => [$imgStr] ]], "options" => ["temperature" => $this->temperature], "stream" => false ]); // Initialize cURL $ch = curl_init($this->ollamaUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_HTTPHEADER, ["Content-Type: application/json"]); curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); // Execute request $response = curl_exec($ch); if (curl_errno($ch)) { $error = curl_error($ch); curl_close($ch); return ["status" => "ERR", "message" => curl_error($ch)]; } curl_close($ch); // Decode response // Remove <pre> tags if present $response = strip_tags($response); $responseData = json_decode($response, true); if (!$responseData || !isset($responseData['message']['content'])) { return ["status"=>"ERR", "message"=> "raw : $response"]; } // Parse content into JSON object $content = $responseData["message"]["content"]; // echo "content:\n"; // echo $content . "\n"; // echo "\n"; $parsedData = []; $lines = explode("\n", $content); foreach ($lines as $line) { $parts = explode(":", $line, 2); if (count($parts) == 2) { $key = trim(str_replace("*","",$parts[0])); $key = strtolower($key); $key = str_replace(" ","_",$key); $value = trim(str_replace("*","",$parts[1])); if (strpos($key, "/")) { $a_key = explode("/", $key); $a_val = explode(",", $value); if (strpos($value, "/")) { $a_val = explode("/", $value); } foreach($a_key as $a_idx => $sub_key) { $sub_val = $a_val[$a_idx]; $parsedData[trim($sub_key)] = trim($sub_val); } } else { $parsedData[$key] = $value; } } } $info["total_duration"] = $responseData["total_duration"]; $info["prompt_eval_duration"] = $responseData["prompt_eval_duration"]; $info["eval_duration"] = $responseData["eval_duration"]; $info["model"] = $responseData["model"]; return ["status"=>"OK", "data" => $parsedData, "info"=> $info]; } } ```
Author

Sample usage :
application/controllers/test/T_ocr.php

<?php

class T_ocr extends MY_Controller
{
  var $base_img = "/home/one/project/one/one-media/scan-ktp/";
  public function __construct()
  {
    parent::__construct();
  }
  public function index()
  {
    $image_path = $this->base_img . "hard-coded-scan-image.jpg";
    $this->load->library('Ocr_llama');
    $result = $this->ocr_llama->extract_ocr($image_path);
    echo json_encode($result,JSON_PRETTY_PRINT);
  }
}

the result is in json

image

Sample usage : application/controllers/test/T_ocr.php ```php <?php class T_ocr extends MY_Controller { var $base_img = "/home/one/project/one/one-media/scan-ktp/"; public function __construct() { parent::__construct(); } public function index() { $image_path = $this->base_img . "hard-coded-scan-image.jpg"; $this->load->library('Ocr_llama'); $result = $this->ocr_llama->extract_ocr($image_path); echo json_encode($result,JSON_PRETTY_PRINT); } } ``` the result is in json ![image](/attachments/885f0b13-ccfa-47a0-b84e-2198f5972cba)
Author

These timing metrics from an API response, representing the duration of different processing stages in nanoseconds (ns). Here's what they mean:

  1. total_duration: 25,349,696,375 ns (≈ 25.35 seconds)

    • This is the total time taken for the entire operation from start to finish.
  2. prompt_eval_duration: 152,000,000 ns (≈ 0.152 seconds)

    • This represents the time taken to evaluate (process) the input prompt before generating any output.
  3. eval_duration: 24,616,000,000 ns (≈ 24.62 seconds)

    • This is the time spent evaluating (generating) the response after processing the prompt.

Interpretation:

  • The majority of the time (≈ 24.62s) was spent generating the response (eval_duration), while only a small fraction (≈ 0.152s) was spent processing the input (prompt_eval_duration).
  • There may be some additional overhead (≈ 0.58s) unaccounted for directly in eval_duration and prompt_eval_duration, possibly for preprocessing, communication, or system latency.
These timing metrics from an API response, representing the duration of different processing stages in nanoseconds (ns). Here's what they mean: 1. **`total_duration`: 25,349,696,375 ns (≈ 25.35 seconds)** - This is the total time taken for the entire operation from start to finish. 2. **`prompt_eval_duration`: 152,000,000 ns (≈ 0.152 seconds)** - This represents the time taken to evaluate (process) the input prompt before generating any output. 3. **`eval_duration`: 24,616,000,000 ns (≈ 24.62 seconds)** - This is the time spent evaluating (generating) the response after processing the prompt. ### Interpretation: - The majority of the time (≈ 24.62s) was spent generating the response (`eval_duration`), while only a small fraction (≈ 0.152s) was spent processing the input (`prompt_eval_duration`). - There may be some additional overhead (≈ 0.58s) unaccounted for directly in `eval_duration` and `prompt_eval_duration`, possibly for preprocessing, communication, or system latency.
Sign in to join this conversation.
No Label
1 Participants
Notifications
Due Date
No due date set.
Dependencies

No dependencies set.

Reference: sindhu/scan-ktp#1