using System;
using System.Collections.Generic;
using System.IO; // For file operations
using System.Linq; // For LINQ extensions like OrderBy, GroupBy
using System.Text; // For StringBuilder
using DETA7.Commons.Utils; // Assuming this provides Vector, LineSegment, Matrix
using DETA7.Kernel.Colors; // Assuming this provides Color, DeviceRgb etc.
using DETA7.Kernel.Font; // Assuming this provides PdfFont, FontProgram
using DETA7.Kernel.Geom; // Assuming this provides Vector, LineSegment, Matrix
using DETA7.Kernel.Pdf; // Assuming this provides PdfString, PdfDictionary
using DETA7.Kernel.Pdf.Canvas; // Assuming this provides CanvasGraphicsState
using DETA7.Kernel.Pdf.Canvas.Parser; // Assuming this provides PdfCanvasProcessor, IEventListener, EventType
using DETA7.Kernel.Pdf.Canvas.Parser.Data; // Assuming this provides TextRenderInfo, PathRenderInfo, AbstractRenderInfo
using DETA7.Kernel.Pdf.Canvas.Parser.Listener; // Assuming this namespace exists, e.g., for IEventListener implementation
namespace saan_trying_pathspositioningscaling_3000_OFFSETS_PAGES_TRANSFORMATIONS_MATRIX_GRAPHIKS_STATES_graphics_data_extractions_pagewises
{
// --- 1. Enriched TextChunk Data Structure ---
// Represents an individual text rendering operation parsed from PDF
public class TextChunk
{
public int PageNumber { get; set; }
public string Text { get; set; }
public Vector StartPoint { get; set; } // Baseline start point in user space (CTM-transformed)
public Vector EndPoint { get; set; } // Baseline end point in user space (CTM-transformed)
public float AngleDegrees { get; set; } // Rotation angle of baseline in degrees (0-360)
public float FontHeight { get; set; } // Actual height of the font (e.g., ascent line length in user space)
public string FontName { get; set; }
public Color FillColor { get; set; }
public int TextRenderMode { get; set; } // PDF Text Rendering Mode (0=fill, 1=stroke, etc.)
public float WordSpacing { get; set; } // Tw operator value
public float CharSpacing { get; set; } // Tc operator value
public float HorizontalScaling { get; set; } // Tz operator value
public Vector BaselineUnitVector { get; set; } // Unit vector along the baseline
public float RenderedWidth { get; set; } // Actual width of the text chunk as rendered in user space
public float AscentValue { get; set; } // Ascent from baseline (user space)
public float DescentValue { get; set; } // Descent from baseline (user space)
public TextChunk(TextRenderInfo renderInfo, int pageNumber)
{
PageNumber = pageNumber;
Text = renderInfo.GetText();
LineSegment baseline = renderInfo.GetBaseline(); // Baseline is already CTM-transformed
StartPoint = baseline.GetStartPoint();
EndPoint = baseline.GetEndPoint();
// Calculate Angle in degrees
float dx = EndPoint.Get(Vector.I1) - StartPoint.Get(Vector.I1);
float dy = EndPoint.Get(Vector.I2) - StartPoint.Get(Vector.I2);
AngleDegrees = (float)(Math.Atan2(dy, dx) * (180.0 / Math.PI));
AngleDegrees = (AngleDegrees + 360) % 360; // Normalize angle to 0-360
// Font & Rendering Properties
FontHeight = renderInfo.ConvertHeightFromTextSpaceToUserSpace(renderInfo.GetAscentDescent()[0]); // Ascent from font metrics, scaled to user space
// Or renderInfo.GetAscentLine().GetLength(); for visual height
FontName = renderInfo.GetFont().GetFontProgram().GetFontNames().GetFontName();
FillColor = renderInfo.GetFillColor();
TextRenderMode = renderInfo.GetTextRenderMode();
WordSpacing = renderInfo.GetWordSpacing();
CharSpacing = renderInfo.GetCharSpacing();
HorizontalScaling = renderInfo.GetHorizontalScaling();
RenderedWidth = renderInfo.ConvertWidthFromTextSpaceToUserSpace(renderInfo.GetUnscaledWidth());
float[] ascentDescent = renderInfo.GetAscentDescent(); // in font's text space
AscentValue = renderInfo.ConvertHeightFromTextSpaceToUserSpace(ascentDescent[0]);
DescentValue = renderInfo.ConvertHeightFromTextSpaceToUserSpace(ascentDescent[1]);
// Calculate Baseline Unit Vector
Vector baselineVec = EndPoint.Subtract(StartPoint);
BaselineUnitVector = baselineVec.Normalize();
}
// Check if two chunks have very similar rendering style properties
public bool IsSimilarStyle(TextChunk other, float fontHeightTolerance = 0.5f, float angleTolerance = 1.0f)
{
// Note: Color.Equals() should handle different color spaces correctly if implemented in DETA7.Kernel.Colors.Color
return Math.Abs(this.FontHeight - other.FontHeight) < fontHeightTolerance &&
Math.Abs(this.AngleDegrees - other.AngleDegrees) < angleTolerance &&
this.FontName.Equals(other.FontName, StringComparison.OrdinalIgnoreCase) &&
this.FillColor.Equals(other.FillColor) && // Strict color match is usually required for engineering
this.TextRenderMode == other.TextRenderMode;
}
// Calculate expected width of a single space character in user space for this chunk's font
public float GetExpectedSpaceWidth()
{
// This needs to align with how TextRenderInfo.GetSingleSpaceWidth() is calculated in DETA7
// For now, provide an estimate based on font size and spacing parameters.
// A more accurate method would query the font metrics for the actual space glyph width.
return (0.3f * this.FontHeight) + this.CharSpacing + this.WordSpacing; // Heuristic
}
}
// --- 2. TextGroup Data Structure for Output ---
// Represents a consolidated text block ready for database insertion
public class TextGroup
{
public int PageNumber { get; set; }
public StringBuilder MergedText { get; set; } = new StringBuilder(); // The combined string
public float InsertionX { get; set; } // X-coordinate of the group's baseline start point
public float InsertionY { get; set; } // Y-coordinate of the group's baseline start point
public float AngleDegrees { get; set; } // Angle of the group's baseline
public float FontHeight { get; set; } // Representative font height of the group
public string FontName { get; set; }
public string FontColorHex { get; set; } // RGB hex string
public string TextRenderModeDescription { get; set; } // Description of the rendering mode (e.g., "Fill", "Stroke")
public string BaselineVectorDirection { get; set; } // Unit vector components as string "dx, dy"
public float GroupWidth { get; set; } // Total rendered width of the merged text group
public float GroupHeight { get; set; } // Overall height of the group (max ascent + max descent)
// Store references to the original chunks that form this group (useful for debugging/traceability)
private readonly List<TextChunk> _originalChunksInGroup = new List<TextChunk>();
public TextGroup(TextChunk initialChunk)
{
PageNumber = initialChunk.PageNumber;
MergedText.Append(initialChunk.Text);
InsertionX = initialChunk.StartPoint.Get(Vector.I1);
InsertionY = initialChunk.StartPoint.Get(Vector.I2);
AngleDegrees = initialChunk.AngleDegrees;
FontHeight = initialChunk.FontHeight;
FontName = initialChunk.FontName;
FontColorHex = GetHexColor(initialChunk.FillColor);
TextRenderModeDescription = GetRenderModeDescription(initialChunk.TextRenderMode);
BaselineVectorDirection = $"{initialChunk.BaselineUnitVector.Get(Vector.I1):F3}, {initialChunk.BaselineUnitVector.Get(Vector.I2):F3}";
GroupWidth = initialChunk.RenderedWidth;
GroupHeight = initialChunk.AscentValue + Math.Abs(initialChunk.DescentValue);
_originalChunksInGroup.Add(initialChunk);
}
// Appends a subsequent text chunk to this group
public void AppendChunk(TextChunk chunk, float actualGap)
{
if (actualGap > 0.01f) // If there's a significant gap, add a space
{
MergedText.Append(" ");
}
MergedText.Append(chunk.Text);
GroupWidth += chunk.RenderedWidth; // Add current chunk's width
// If the group height needs to dynamically adjust to max/min of all chunks
GroupHeight = Math.Max(GroupHeight, chunk.AscentValue + Math.Abs(chunk.DescentValue));
_originalChunksInGroup.Add(chunk);
}
// Helper to convert DETA7 Color to Hex String
private string GetHexColor(Color color)
{
if (color == null || color.GetColorValue() == null || color.GetColorValue().Length < 3) return "#FFFFFF";
try
{
int r = (int)(color.GetColorValue()[0] * 255);
int g = (int)(color.GetColorValue()[1] * 255);
int b = (int)(color.GetColorValue()[2] * 255);
return $"#{r:X2}{g:X2}{b:X2}";
}
catch { return "#FFFFFF"; }
}
// Helper to get descriptive text for render mode
private string GetRenderModeDescription(int mode)
{
switch (mode)
{
case 0: return "Fill";
case 1: return "Stroke";
case 2: return "Fill+Stroke";
case 3: return "Invisible";
case 4: return "Fill+Clip";
case 5: return "Stroke+Clip";
case 6: return "Fill+Stroke+Clip";
case 7: return "Clip";
default: return "Unknown";
}
}
// --- Output Formatting for Database / TSV ---
public string ToTabSeparatedString(int groupCounter)
{
// Escape tab characters within text content if they exist
string escapedText = MergedText.ToString().Replace("\t", " "); // Replace tabs with spaces for TSV safety
return $"{PageNumber}\t" +
$"{groupCounter}\t" +
$"{AngleDegrees:F2}\t" +
$"{FontColorHex}\t" +
$"{FontName}\t" +
$"{FontHeight:F2}\t" +
$"{TextRenderModeDescription}\t" +
$"{BaselineVectorDirection}\t" + // Added baseline vector
$"{InsertionX:F2}\t" +
$"{InsertionY:F2}\t" +
$"{GroupWidth:F2}\t" + // Added combined width
$"{GroupHeight:F2}\t" + // Added combined height
$"\"{escapedText}\""; // Quote the text content to handle spaces/commas if it were CSV
}
public static string GetTsvHeader()
{
return "PageNumber\tTextGroupCounter\tSlopeInDegrees\tColorHex\tFontName\tFontHeight\tTextStyle\tBaselineVector\tInsertionX\tInsertionY\tGroupWidth\tGroupHeight\tTextContent";
}
// For internal use, to get the very first chunk for style comparison
internal TextChunk GetInitialChunk()
{
return _originalChunksInGroup.FirstOrDefault();
}
}
// --- 3. Revised Text Grouping Strategy ---
// This class is the core logic for collecting and grouping text chunks.
public class DxfTextGroupingStrategy : IEventListener
{
private readonly List<TextChunk> _allTextChunksOnCurrentPageBuffer = new List<TextChunk>(); // Buffer for texts on current page
public readonly List<TextGroup> GroupedTextOutput = new List<TextGroup>(); // Final grouped texts for all pages
private int _currentPageNumber; // Current page number being processed
private float _currentPageWidth;
private float _currentPageHeight;
public void SetPageInfo(int pageNumber, float pageWidth, float pageHeight)
{
// When page changes, process buffered chunks from previous page
ProcessBufferedTextChunksForPage(_currentPageNumber); // Process for the page just finished
_allTextChunksOnCurrentPageBuffer.Clear(); // Clear buffer for new page
_currentPageNumber = pageNumber;
_currentPageWidth = pageWidth;
_currentPageHeight = pageHeight;
}
public void EventOccurred(IEventData data, EventType type)
{
if (type == EventType.RENDER_TEXT)
{
var renderInfo = (TextRenderInfo)data;
// Create a TextChunk instance directly from TextRenderInfo
_allTextChunksOnCurrentPageBuffer.Add(new TextChunk(renderInfo, _currentPageWidth, _currentPageHeight));
}
}
public ICollection<EventType> GetSupportedEvents()
{
return new HashSet<EventType> { EventType.RENDER_TEXT };
}
/// <summary>
/// This method is called to finalize grouping for a specific page.
/// It's called by `SetPageInfo` or by the main orchestrator after all pages are processed.
/// </summary>
/// <param name="pageNumber">The number of the page whose chunks are being processed.</param>
public void ProcessBufferedTextChunksForPage(int pageNumber)
{
if (!_allTextChunksOnCurrentPageBuffer.Any() || pageNumber == 0) // pageNumber 0 for initial call
{
return;
}
Console.WriteLine($"\n Processing text chunks for page {pageNumber}...");
// 1. Sort text chunks for the current page
// Prioritize Angle (rounded), then Y, then X.
var sortedChunks = _allTextChunksOnCurrentPageBuffer
.OrderBy(c => Math.Round(c.AngleDegrees, 0)) // Primary: Group by angle
.ThenByDescending(c => c.StartPoint.Get(Vector.I2)) // Secondary: Y (top to bottom)
.ThenBy(c => c.StartPoint.Get(Vector.I1)) // Tertiary: X (left to right)
.ToList();
if (!sortedChunks.Any())
{
return;
}
TextGroup currentGroup = null;
// Define intelligent thresholds based on properties of text chunks
float angleTolerance = 1.0f; // Degrees: allow slight variations in angle for same line
float perpendicularDistanceThresholdFactor = 0.6f; // Vertical distance (fraction of font height)
float horizontalGapThresholdFactor = 1.5f; // Horizontal gap to consider adding space (fraction of expected space width)
float mergeGapThresholdFactor = 0.3f; // Horizontal gap to directly merge (fraction of expected space width)
for (int i = 0; i < sortedChunks.Count; i++)
{
TextChunk currentChunk = sortedChunks[i];
if (currentGroup == null)
{
currentGroup = new TextGroup(currentChunk);
}
else
{
TextChunk lastChunkInGroup = currentGroup.GetInitialChunk(); // Get the anchor chunk for comparison
if (lastChunkInGroup == null) { // Fallback, should not happen if group initialized
currentGroup = new TextGroup(currentChunk);
continue;
}
// 1. Check for Feature Similarity (Font, Color, Render Mode, etc.)
if (!currentChunk.IsSimilarStyle(lastChunkInGroup,
fontHeightTolerance: lastChunkInGroup.FontHeight * 0.1f, // 10% height tolerance
angleTolerance: angleTolerance))
{
// Styles don't match, finalize current group and start a new one
GroupedTextOutput.Add(currentGroup);
currentGroup = new TextGroup(currentChunk);
continue;
}
// 2. Check for Contiguity/Proximity along Baseline (most critical for engineering docs)
Vector relativeStart = currentChunk.StartPoint.Subtract(lastChunkInGroup.StartPoint);
// Perpendicular distance from current chunk's start to previous chunk's baseline
// This checks if they are truly on the same visual line (even if slanted)
float perpendicularDistance = Math.Abs(relativeStart.Cross(lastChunkInGroup.BaselineUnitVector).Length());
// Check if chunks are on the same line (vertically close enough)
bool onSameLogicalLine = perpendicularDistance < (lastChunkInGroup.FontHeight * perpendicularDistanceThresholdFactor);
if (onSameLogicalLine)
{
// Calculate distance along the baseline
float distanceAlongBaseline = relativeStart.Dot(lastChunkInGroup.BaselineUnitVector);
// Expected end point of previous chunk + its spacing
float expectedPreviousEnd = lastChunkInGroup.RenderedWidth; // Start with raw rendered width
expectedPreviousEnd += lastChunkInGroup.CharSpacing * lastChunkInGroup.Text.Length; // Add char spacing for all chars
// No word spacing here for character-by-character checks, unless a space was actually present
// This `expectedPreviousEnd` is the end of the previous chunk if it were a continuous block.
float gap = distanceAlongBaseline - expectedPreviousEnd;
float expectedSpaceWidth = lastChunkInGroup.GetExpectedSpaceWidth(); // Dynamic space width based on font
bool shouldMergeDirectly = gap < (expectedSpaceWidth * mergeGapThresholdFactor); // Very small gap
bool shouldAddSpace = gap < (expectedSpaceWidth * horizontalGapThresholdFactor) && gap > shouldMergeDirectly ? expectedSpaceWidth * mergeGapThresholdFactor : 0.0f; // Gap implies space
if (shouldMergeDirectly || shouldAddSpace)
{
currentGroup.AppendChunk(currentChunk, gap);
}
else
{
// Gap is too large to be considered a word space or contiguous. Start new group on same line.
GroupedTextOutput.Add(currentGroup);
currentGroup = new TextGroup(currentChunk);
}
}
else
{
// Not on the same logical line (too far vertically), start a new group
GroupedTextOutput.Add(currentGroup);
currentGroup = new TextGroup(currentChunk);
}
}
}
// Add the last logical chunk of the current page after the loop finishes
if (currentGroup != null)
{
GroupedTextOutput.Add(currentGroup);
}
Console.WriteLine($" Grouped {sortedChunks.Count} chunks into {GroupedTextOutput.Count - (GroupedTextOutput.Count - _allTextChunksOnCurrentPageBuffer.Count)} groups for page {pageNumber}."); // Simplified counting
}
/// <summary>
/// This method is called once after all pages have been processed to finalize any remaining buffered chunks.
/// </summary>
public void FinalizeAllTextGrouping()
{
ProcessBufferedTextChunksForPage(_currentPageNumber); // Process the last page's buffered chunks
_allTextChunksOnCurrentPageBuffer.Clear(); // Ensure buffer is empty
}
}
// --- 4. Orchestrating Class (modified to use new strategy and TSV export) ---
// This class coordinates the PDF reading and listener
public class PdfGraphicsExtractor_to_singles_30000_offsets_dxf____trying_with_new_scales_translations
{
public static void ExtractAndGenerateDxf(string selected_pdffilesnames)
{
string inputFilePath = selected_pdffilesnames;
string outputGraphicsLogPath = inputFilePath + "___dumping_pagewises_graphics_data.txt";
string dxfFilePath = inputFilePath + "___pagewises_graphics_data.dxf";
string groupedTextTsvPath = inputFilePath + "___grouped_texts.tsv"; // New output for grouped texts (TSV)
PdfReader reader = null;
PdfDocument pdfDoc = null;
try
{
reader = new PdfReader(inputFilePath);
pdfDoc = new PdfDocument(reader);
GraphicsAndText_ExtractionListener listener = new GraphicsAndText_ExtractionListener();
PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);
for (int i = 1; i <= pdfDoc.GetNumberOfPages(); i++)
{
PdfPage page = pdfDoc.GetPage(i);
float pageWidth = page.GetPageSize().GetWidth();
float pageHeight = page.GetPageSize().GetHeight();
// Set page info for the main listener (which forwards to grouping strategy's buffer)
listener.SetPageInfo(i, pageWidth, pageHeight);
processor.ProcessPageContent(page);
// The DxfTextGroupingStrategy within the listener will buffer text chunks internally page by page.
}
// After all pages are processed, finalize text grouping across all buffered chunks
listener._textGroupingStrategy.FinalizeAllTextGrouping();
// Get the final grouped texts
List<TextGroup> finalGroupedTexts = listener._textGroupingStrategy.GroupedTextOutput;
// --- Write grouped texts to TSV file ---
using (StreamWriter writer = new StreamWriter(groupedTextTsvPath, false, Encoding.UTF8))
{
writer.WriteLine(TextGroup.GetTsvHeader());
for (int i = 0; i < finalGroupedTexts.Count; i++)
{
writer.WriteLine(finalGroupedTexts[i].ToTabSeparatedString(i + 1)); // i+1 for 1-based counter
}
}
Console.WriteLine($"Grouped texts extracted to TSV: {groupedTextTsvPath}");
// --- Generate final DXF data (graphics + grouped texts) ---
// The pageOffsetFactor is used here during DXF generation of both graphics and texts.
List<string> finalDxfEntities = listener.GetFinalDxfData();
// Write DXF Header/Footer (from your existing code)
List<string> dxfContent = new List<string>();
dxfContent.Add("0\nSECTION\n2\nHEADER\n0\nENDSEC");
dxfContent.Add("0\nSECTION\n2\nTABLES\n0\nENDSEC"); // simplified tables
dxfContent.Add("0\nSECTION\n2\nBLOCKS\n0\nENDSEC");
dxfContent.Add("0\nSECTION\n2\nENTITIES");
dxfContent.AddRange(finalDxfEntities); // Add all graphics and grouped text entities
dxfContent.Add("0\nENDSEC");
dxfContent.Add("0\nSECTION\n2\nOBJECTS\n0\nENDSEC");
dxfContent.Add("0\nEOF");
System.IO.File.WriteAllLines(dxfFilePath, dxfContent);
Console.WriteLine($"DXF generation completed successfully to: {dxfFilePath}");
listener.WriteGraphicsData(outputGraphicsLogPath); // Original raw graphics/text log
Console.WriteLine("Process completed.");
}
catch (Exception ex)
{
Console.WriteLine($"An error occurred during PDF extraction: {ex.Message}");
Console.WriteLine(ex.StackTrace);
}
finally
{
pdfDoc?.Close();
reader?.Close();
}
}
} }
Here's a detailed explanation of what the namespace and the class are doing:
________________________________________
🧭 Namespace Overview
saan_trying_pathspositioningscaling_3000_OFFSETS_PAGES_TRANSFORMATIONS_MATRIX_GRAPHIKS_STATES_graphics_data_extractions_pagewises
This namespace is part of a PDF graphics and text extraction system, likely used for engineering or CAD-like applications. It handles:
• Page-wise extraction of graphical and textual content from PDF files.
• Transformation matrix handling for positioning and scaling.
• DXF generation for CAD interoperability.
• Layered classification of graphical entities (lines, curves, text) for semantic filtering.
It appears to be designed for high-fidelity extraction of vector graphics and text from PDFs, especially those generated by tools like Revit, Tekla, or other engineering software.
________________________________________
🧩 Class Overview
public class GraphicsAndText_ExtractionListener : IEventListener
This class is the core listener that responds to PDF canvas events during parsing. It implements the IEventListener interface and is responsible for:
🔹 Responsibilities
1. Listening to PDF Render Events:
• RENDER_PATH: Extracts vector graphics (lines, curves).
• RENDER_TEXT: Extracts text content and metadata.
• BEGIN_TEXT, END_TEXT: Marks text blocks (optional).
• (Commented support for SAVE_GRAPHICS_STATE, RESTORE_GRAPHICS_STATE, MODIFY_CTM)
2. Data Collection:
• graphicsData: Stores raw graphical path details.
• ListOfStringAsTextDataOnlys: Stores extracted text details.
• dxfData: Stores DXF-formatted output strings.
3. Transformation Matrix Handling:
• Applies transformation matrices to convert PDF coordinates to DXF coordinates.
• Handles scaling, rotation, and offsetting per page.
4. DXF Generation:
• Converts lines and Bezier curves into DXF LINE entities.
• Converts text into DXF TEXT entities with font, color, rotation, and layer metadata.
• Applies page-wise offsets (e.g., 30000 * (pageNumber - 1)) for spatial separation.
5. Layer Classification:
• Dynamically generates DXF layer names based on:
• Font size
• Rotation angle
• Word spacing
• Font name
• Fill color
• Helps in semantic filtering of DXF content.
6. Color Mapping:
• Converts RGB to AutoCAD ACI color codes.
• Adds trueColor and ACI color to DXF entities.
7. Geometry Classification:
• Detects if paths are:
• Closed (polygons)
• Open (lines)
• Beziered (curves)
• Linear (straight segments)
• Uses this to tag DXF layers meaningfully.
8. Output Writing:
• WriteGraphicsData(): Dumps raw graphics and text to .txt.
• WriteDxfData(): Writes full DXF content with headers and footers.
________________________________________
🧠 Why It’s Powerful
This class is designed for engineering-grade PDF parsing, enabling:
• Automated conversion of PDF drawings to DXF.
• Semantic tagging of geometry and text.
• Layered classification for CAD filtering.
• Support for multi-page documents with spatial separation.
The namespace saan_trying_pathspositioningscaling_3000_OFFSETS_PAGES_TRANSFORMATIONS_MATRIX_GRAPHIKS_STATES_graphics_data_extractions_pagewises and its class GraphicsAndText_ExtractionListener : IEventListener are designed to extract graphical paths, text objects, and page-level transformations from PDF documents, likely using the iText (iText7) PDF processing library.
________________________________________
🔍 What the Namespace Does:
The namespace focuses on:
• Analyzing each PDF page's content stream.
• Tracking and interpreting graphics state changes, text content, and path operations.
• Handling matrix transformations, positioning, scaling, and clipping for graphics and text.
This likely aims to extract a vector graphics + text layout reconstruction useful for:
• DXF or SVG generation.
• High-fidelity rendering on custom canvases.
• Alignment and annotation tasks in engineering or document automation.
________________________________________
🧠 Class: GraphicsAndText_ExtractionListener Implements IEventListener
This class listens to iText PDF rendering instructions and performs structured data collection from the content stream.
🔧 Major Responsibilities:
1. Text Extraction:
o Listens for text rendering operations (EventType.RENDER_TEXT).
o Extracts:
String content
Text matrix (positioning, scale, rotation)
Font size, width, height
Bounding boxes (to reconstruct layout)
2. Path Extraction:
o Captures vector path definitions like lines, rectangles, bezier curves using:
EventType.RENDER_PATH
Operates on path PdfCanvasPath objects
o Stores:
Path commands (moveTo, lineTo, curveTo)
Stroke/fill properties
Clipping flags
3. Graphics State Stack:
o Tracks changes like:
gsave, grestore
CTM (Current Transformation Matrix)
Stroke width, color, fill color
Line cap, join, dash patterns
4. Page Transform Context:
o Extracts page-level transformation matrices to map all elements from local (object) space to global (page) space.
5. Data Structuring:
o Stores parsed data in reusable C# objects like:
ExtractedPath
ExtractedTextObject
GraphicsStateSnapshot
o Probably maintains per-page collections for batching or export.
________________________________________
📘 What the Class Likely Contains (based on standard patterns):
csharp
CopyEdit
public class GraphicsAndText_ExtractionListener : IEventListener
{
public void EventOccurred(IEventData data, EventType type)
{
switch (type)
{
case EventType.RENDER_TEXT:
// Extract text string, matrix, font info
break;
case EventType.RENDER_PATH:
// Capture path operations, graphic state
break;
case EventType.CLIP:
// Handle clipping paths
break;
case EventType.BEGIN_TEXT / END_TEXT:
// Track grouping of text sections
break;
}
}
public ICollection<EventType> GetSupportedEvents()
{
return new HashSet<EventType> {
EventType.RENDER_TEXT,
EventType.RENDER_PATH,
EventType.CLIP,
EventType.BEGIN_TEXT,
EventType.END_TEXT
};
}
}
________________________________________
✅ Key Use Cases Enabled by This System:
• Extract engineering drawings embedded as vector paths.
• Extract text positioning for semantic mapping (OCR post-processing, heading alignment).
• Feed extracted graphics into:
o CNC path generation
o DXF/DWG vector exports
o Comparison tools for as-built vs. design drawings
• Integrate layout analysis for annotation/machine learning systems.
________________________________________
Would you like me to:
• Summarize the internal object models used (ExtractedTextObject, ExtractedPath, etc.)?
• Show how to extend this to extract rotation/scale metadata or generate DXF output?
• Integrate this listener into a multi-page PDF parser?
Let me know your enhancement direction.
The user has provided a C# code snippet from a PDF processing library (likely iTextSharp or a similar derivative, given the namespace structure and class names like PdfCanvasProcessor, IEventListener, Matrix, PdfObject, etc.).
Let's break down the saan_trying_pathspositioningscaling_3000_OFFSETS_PAGES_TRANSFORMATIONS_MATRIX_GRAPHIKS_STATES_graphics_data_extractions_pagewises namespace and the GraphicsAndText_ExtractionListener class within it.
________________________________________
Namespace: saan_trying_pathspositioningscaling_3000_OFFSETS_PAGES_TRANSFORMATIONS_MATRIX_GRAPHIKS_STATES_graphics_data_extractions_pagewises
This is a very descriptive (and quite long!) namespace name, indicating its purpose. It suggests an attempt by "saan" to develop functionality related to:
• _3000_OFFSETS: Implies handling page-wise offsets, likely for laying out content from different PDF pages into a single coordinate system (e.g., in a DXF export). The value 30000 is explicitly used as an offset factor in the code (30000 * (pageNumber - 1)). This is a common technique when converting multi-page documents to single-sheet CAD formats where each page's content is simply translated on a large "paper space".
• PAGES_TRANSFORMATIONS_MATRIX: Indicates that the code is dealing with the Coordinate Transformation Matrix (CTM) of PDF graphics. PDF uses CTMs to transform coordinates from user space to device space, and these matrices are crucial for correctly positioning and scaling elements. The constants CTM_SAAN__a, CTM_SAAN__b, etc., suggest direct manipulation or logging of CTM components.
• GRAPHIKS_STATES: Refers to the graphics state stack in PDF. PDF content streams use operators like q (save graphics state) and Q (restore graphics state) to manage properties like current transformation matrix, line width, color, font, etc. The code explicitly handles SAVE_GRAPHICS_STATE and RESTORE_GRAPHICS_STATE event types (though commented out in the initial GraphicsAndText_ExtractionListener definition, but present in PdfCanvasProcessor).
• graphics_data_extractions: The primary goal is to extract graphical primitives (lines, curves, rectangles, etc.) from PDF content.
• pagewises: The extraction and processing are done on a page-by-page basis.
In essence, this namespace encapsulates a custom PDF content stream processing logic, specifically designed for extracting and transforming graphic and text data from PDF pages, likely for export into a CAD format like DXF, with explicit handling of PDF's internal graphics state and coordinate transformations. The "saan" prefix implies a personal or internal development effort.
________________________________________
Class: GraphicsAndText_ExtractionListener
This class is the concrete implementation of the IEventListener interface, which is part of a PDF parsing framework (like iText). Its role is to listen for and process various rendering events as a PdfCanvasProcessor reads through a PDF's content stream.
Core Responsibilities and Functionality:
1. Event Listening (IEventListener):
o It implements the IEventListener interface, specifically the EventOccurred method, which is the callback invoked by PdfCanvasProcessor whenever a rendering event (like drawing a path, showing text, saving graphics state) occurs.
o It also implements GetSupportedEvents(), which tells the PdfCanvasProcessor which types of events this listener is interested in. In the provided code, it supports BEGIN_TEXT, RENDER_PATH, RENDER_TEXT, and END_TEXT. (Interestingly, it has commented-out lines for SAVE_GRAPHICS_STATE, RESTORE_GRAPHICS_STATE, MODIFY_CTM, which suggests these were considered or implemented at some point).
2. Data Storage:
o graphicsData (List
o ListOfStringAsTextDataOnlys (List
o dxfData (List
3. Page Information Management:
o currentPageNumber, currentPageWidth, currentPageHeight: Stores details about the currently processed PDF page, set by the SetPageInfo method. This is essential for applying page-wise offsets for DXF export.
4. Transformation Matrix Handling:
o currentTransformationMatrix, currentSAANTextTransformationMatrix: While explicitly declared, their direct manipulation within this specific listener is not fully evident in the provided snippets. However, the TransformPoint helper methods use the renderInfo.GetCtm() (Current Transformation Matrix) to convert points from PDF's internal coordinate system to a consistent output system (which then gets the offsetspageswises added).
5. Event Processing Logic (EventOccurred method):
o RENDER_PATH: When a graphic path (lines, curves, rectangles) is rendered:
It casts the IEventData to PathRenderInfo.
Calls GetPathDetails to format a descriptive string of the path, adding it to graphicsData.
Calls AddToDxfData to generate corresponding DXF LINE or ARC/SPLINE (approximated by lines) entities, applying the offsetspageswises for multi-page export. It attempts to classify paths as "CLOSED," "OPEN," or "LINEAR," and sets DXF layer names and ACI colors based on graphic state properties.
o RENDER_TEXT: When text is rendered:
It casts the IEventData to TextRenderInfo.
Calls GetTextDetails to extract text content, its matrix, font size, baseline, etc., and adds it to ListOfStringAsTextDataOnlys.
Calls AddTextContentsToToDxfData to generate DXF TEXT entities, including position, rotation, font height, color, and layer information. It also adds a debug "SANJOYNATH" text entity, which is interesting.
6. Helper Methods for Extraction and Transformation:
o GetPathDetails: Formats path information for human readability.
o GetTextDetails: Formats text rendering information.
o AddToDxfData: Generates DXF LINE and LWPOLYLINE (for approximated Bezier curves) entries. It attempts to deduce DXF layer names from PDF graphics state attributes (line width, color, dash pattern, join/cap styles). It also includes logic to close paths if needed for DXF polygon representation.
o AddTextContentsToToDxfData: Generates DXF TEXT entities. It tries to get font details, color, and calculates the rotation angle from the text's baseline. It maps PDF RGB colors to AutoCAD Color Index (ACI) using GetClosestAciColor.
o TransformPoint: A crucial utility that applies a PDF CTM (Current Transformation Matrix) to a point, converting it to the desired output coordinate system before applying the page offset.
o CalculateBezierPoint: Implements the cubic Bezier curve formula to approximate Bezier curves with multiple line segments for DXF export.
o PointsEqual: Helper for checking point equality with tolerance.
o GetClosestAciColor: A utility to map RGB colors found in PDF to AutoCAD Color Index (ACI) colors for DXF compatibility.
7. Output Methods:
o WriteGraphicsData: Writes the raw graphicsData (including text data appended) to a plain text file.
o WriteDxfData: Assembles the DXF header, entities (from dxfData), and footer, then writes the complete DXF content to a .dxf file.
In summary, GraphicsAndText_ExtractionListener acts as a specialized PDF event handler that captures raw graphical and textual elements, processes their properties and transformations, and then converts them into a structured format (DXF) suitable for CAD applications, applying page-wise offsets for multi-page document aggregation. It's a key component in a PDF-to-CAD conversion tool. The extensive comments and logging within the provided code also suggest a focus on understanding the sequence and details of PDF rendering operations.
Question is the code i have writen is placing the broken texts on the dxf files but i need to implement the using System;
using System.Collections.Generic;
using System.Text;
using DETA7.Commons.Utils;
using DETA7.Kernel.Geom;
using DETA7.Kernel.Pdf.Canvas.Parser;
using DETA7.Kernel.Pdf.Canvas.Parser.Data;
namespace DETA7.Kernel.Pdf.Canvas.Parser.Listener
{
public class SimpleTextExtractionStrategy : DETA7ExtractionStrategy
{
public Vector lastStart;
public Vector lastEnd;
/// <summary>used to store the resulting String.</summary>
public StringBuilder result = new StringBuilder();
public virtual void EventOccurred(IEventData data, EventType type)
{
if (type.Equals(EventType.RENDER_TEXT))
{
TextRenderInfo renderInfo = (TextRenderInfo)data;
bool firstRender = result.Length == 0;
bool hardReturn = false;
LineSegment segment = renderInfo.GetBaseline();
Vector start = segment.GetStartPoint();
Vector end = segment.GetEndPoint();
if (!firstRender)
{
Vector x1 = lastStart;
Vector x2 = lastEnd;
// see http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html
float dist = (x2.Subtract(x1)).Cross((x1.Subtract(start))).LengthSquared() / x2.Subtract(x1).LengthSquared ();
// we should probably base this on the current font metrics, but 1 pt seems to be sufficient for the time being
float sameLineThreshold = 1f;
if (dist > sameLineThreshold)
{
hardReturn = true;
}// if (dist > sameLineThreshold)
}
// Note: Technically, we should check both the start and end positions, in case the angle of the text changed without any displacement
// but this sort of thing probably doesn't happen much in reality, so we'll leave it alone for now
if (hardReturn)
{
//System.out.println("<< Hard Return >>");
AppendTextChunk("\n");
}
else
{
if (!firstRender)
{
// we only insert a blank space if the trailing character of the previous string wasn't a space, and the leading character of the current string isn't a space
if (result[result.Length - 1] != ' ' && renderInfo.GetText().Length > 0 && renderInfo.GetText()[0] != ' ')
{
float spacing = lastEnd.Subtract(start).Length();
if (spacing > renderInfo.GetSingleSpaceWidth() / 2f)
{
AppendTextChunk(" ");
}//if (spacing > renderInfo.GetSingleSpaceWidth() / 2f)
}//if (result[result.Length - 1] != ' ' && renderInfo.GetText().Length > 0 && renderInfo.GetText()[0] != ' ')
}// if (!firstRender)
}//end of else of if (hardReturn)
//System.out.println("Inserting implied space before '" + renderInfo.getText() + "'");
//System.out.println("Displaying first string of content '" + text + "' :: x1 = " + x1);
//System.out.println("[" + renderInfo.getStartPoint() + "]->[" + renderInfo.getEndPoint() + "] " + renderInfo.getText());
AppendTextChunk(renderInfo.GetText());
lastStart = start;
lastEnd = end;
}// if (type.Equals(EventType.RENDER_TEXT))
}
public virtual ICollection<EventType> GetSupportedEvents()
{
return JavaCollectionsUtil.UnmodifiableSet
(new LinkedHashSet<EventType>
(JavaCollectionsUtil.SingletonList
(EventType.RENDER_TEXT
)
)
);
}
/// <summary>Returns the result so far.</summary>
/// <returns>a String with the resulting text.</returns>
public virtual String GetResultantText()
{
return result.ToString();
}
/// <summary>Used to actually append text to the text results.</summary>
/// <remarks>
/// Used to actually append text to the text results. Subclasses can use this to insert
/// text that wouldn't normally be included in text parsing (e.g. result of OCR performed against
/// image content)
/// </remarks>
/// <param name="text">the text to append to the text results accumulated so far</param>
public void AppendTextChunk(String text)
{
result.Append(text);
}
}
} such that all the broken texts on the dxf file need to come as the single string when the strategy finds the strings(words as not broken characters) how to implement that for the texts???
Thanks for sharing the code and context, Sanjoy. Based on your goal — to merge broken text fragments into a single coherent string when placing text into DXF files — here's a clear strategy to enhance your implementation:
________________________________________
🧩 Problem Summary
Currently, each TextRenderInfo instance is processed individually, and its text is placed separately in the DXF. This causes broken or fragmented text placement, especially when characters or words are rendered as separate events.
________________________________________
✅ Enhancement Strategy
You need to group text fragments that belong to the same line or logical unit before placing them into DXF. Here's how to do it:
________________________________________
🔧 Implementation Steps
1. Track and Group Text by Line
Create a structure to group TextRenderInfo objects that lie on the same line. You can use a List<List<TextRenderInfo>> or a dictionary keyed by Y-coordinate (with tolerance).
2. Group TextRenderInfo by Baseline Y
In your EventOccurred method, when EventType.RENDER_TEXT is triggered:
3. Sort and Merge Texts
After processing the page, sort each line’s fragments by X-coordinate and merge them:
4. Update DXF Placement
Replace AddTextContentsToToDxfData logic to accept the merged string and use the first fragment’s position:
________________________________________
🧠 Optional Enhancements
• Handle rotation and font size consistently across fragments.
• Detect paragraph breaks using vertical spacing.
• Use MCID or ActualText if available for semantic grouping.
________________________________________
Would you like me to generate a complete code snippet for this grouping and merging logic? Or integrate it directly into your existing GraphicsAndText_ExtractionListener class?
Dictionary<float, list> textLines = new Dictionary<float, list>();</float, list</float, list
float lineTolerance = 1.0f; // Adjust based on font size or resolution
TextRenderInfo renderInfo = (TextRenderInfo)data;
float y = renderInfo.GetBaseline().GetStartPoint().Get(Vector.I2);
// Find matching line within tolerance
float matchedY = textLines.Keys.FirstOrDefault(existingY => Math.Abs(existingY - y) < lineTolerance);
if (matchedY != 0)
textLines[matchedY].Add(renderInfo);
else
textLines[y] = new List { renderInfo };
foreach (var line in textLines)
{
var sortedFragments = line.Value.OrderBy(t => t.GetBaseline().GetStartPoint().Get(Vector.I1)).ToList();
string mergedText = string.Join("", sortedFragments.Select(t => t.GetText()));
// Use the first fragment's position for DXF placement
var first = sortedFragments.First();
AddMergedTextToDxf(first, mergedText);
}
private void AddMergedTextToDxf(TextRenderInfo baseInfo, string mergedText)
{
// Use baseInfo.GetBaseline().GetStartPoint() for position
// Use mergedText for content
// Generate DXF entity as usual
}
To solve the broken text issue where individual characters or glyphs are being rendered as separate TextRenderInfo events (common in PDFs), you need to implement intelligent text chunk merging logic based on positioning heuristics.
The SimpleTextExtractionStrategy you've pasted works well for standard paragraph text, but in your DXF context (especially from vectorized or CAD-like PDF exports), text often appears as individually placed characters, not as grouped words.
________________________________________
✅ Problem:
• Your current DXF writer outputs each character (from individual TextRenderInfo) as an isolated entity.
• Words like "COLUMN" are being broken into 'C', 'O', 'L', etc., each placed individually.
________________________________________
✅ Solution Outline:
1. Accumulate a list of TextChunk objects (with text, position, font, size).
2. Use horizontal position & direction to merge adjacent characters into a full word.
3. Respect line breaks using a vertical threshold.
________________________________________
✅ C# Enhancement: Improved DXF-Compatible Text Strategy
Here is how you can build a smarter TextExtractionStrategy:
csharp
CopyEdit
public class DxfTextGroupingStrategy : IEventListener
{
private readonly List<TextChunk> chunks = new List<TextChunk>();
public void EventOccurred(IEventData data, EventType type)
{
if (type == EventType.RENDER_TEXT)
{
var renderInfo = (TextRenderInfo)data;
string text = renderInfo.GetText();
Vector start = renderInfo.GetBaseline().GetStartPoint();
Vector end = renderInfo.GetBaseline().GetEndPoint();
float angle = (float)Math.Atan2(end.Get(1) - start.Get(1), end.Get(0) - start.Get(0));
float dist = end.Subtract(start).Length();
chunks.Add(new TextChunk
{
Text = text,
X = start.Get(0),
Y = start.Get(1),
Width = dist,
Angle = angle
});
}
}
public ICollection<EventType> GetSupportedEvents()
{
return new HashSet<EventType> { EventType.RENDER_TEXT };
}
public List<string> GetMergedLines()
{
var sorted = chunks.OrderByDescending(c => c.Y).ThenBy(c => c.X).ToList();
var lines = new List<List<TextChunk>>();
float lineThreshold = 2.0f;
foreach (var chunk in sorted)
{
var line = lines.FirstOrDefault(l => Math.Abs(l[0].Y - chunk.Y) < lineThreshold);
if (line == null)
{
line = new List<TextChunk>();
lines.Add(line);
}
line.Add(chunk);
}
var mergedLines = new List<string>();
foreach (var line in lines)
{
var ordered = line.OrderBy(c => c.X).ToList();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < ordered.Count; i++)
{
if (i > 0)
{
float gap = ordered[i].X - (ordered[i - 1].X + ordered[i - 1].Width);
if (gap > 1.0f)
sb.Append(" ");
}
sb.Append(ordered[i].Text);
}
mergedLines.Add(sb.ToString());
}
return mergedLines;
}
public class TextChunk
{
public string Text;
public float X;
public float Y;
public float Width;
public float Angle;
}
}
________________________________________
✅ Usage
csharp
CopyEdit
var strategy = new DxfTextGroupingStrategy();
PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);
processor.ProcessPage(page); // your iText page
var lines = strategy.GetMergedLines();
foreach (var line in lines)
{
Console.WriteLine(line); // Replace with DXF text writer
}
________________________________________
✅ What This Fixes:
• Broken text like 'B', 'E', 'A', 'M' gets reconstructed into "BEAM".
• Letters placed at small consistent X-gap are considered part of the same word.
• Texts on the same Y-line (with some tolerance) are grouped as a line.
• Spaces are inserted if horizontal gaps exceed a threshold.
________________________________________
Would you like:
• A version that also handles rotated text (beta angle aware)?
• A DXF export module that takes this grouped output and writes MText entities?
Let me know how you want to wire this into your listener pipeline.
Improving Text Extraction
I'm currently focused on improving text extraction from PDF to DXF. My goal is to group individual characters or small text chunks, as they're rendered in the PDF, and convert them to more coherent DXF TEXT entities. This is proving tricky, as determining logical groupings requires some clever pattern recognition. I'm investigating different algorithms, and I'm hopeful I'll find something that makes this conversion smarter.
Refining Grouping Logic
I've been wrestling with the core problem: how to buffer and analyze TextRenderInfo objects to create logical DXF TEXT entities. The existing strategy, while functional, lacks the sophistication needed for accurate text grouping. I'm thinking of creating a new TextExtractionStrategy specifically tailored for DXF export. This strategy will buffer TextRenderInfo, apply spatial analysis to detect word breaks and phrases, and then assemble logical text blocks before handing them off to the DXF export logic. I'm sketching out the spatial analysis algorithms now.
Designing a New Approach
I'm now devising a new plan, starting with a TextChunk data structure to hold text, bounding box info, and font details. I'll need to buffer TextRenderInfo objects and analyze them for proximity to form logical text blocks. A ProcessPageTextsForDxf() method, called after page processing, will handle this grouping. Sorting by position will begin the text block forming.
Devising Grouping Rules
I'm now diving into the finer points of text grouping. The main challenge is creating rules to handle variable spacing and fonts. I will need to refine the proximity checks, making them sensitive to font size and style. The baseline distance logic needs adjustments for handling varying line spacing. I'm focusing on creating robust, context-aware rules to identify logical word boundaries even in tricky scenarios.
Formulating Grouping Algorithms
I've been meticulously designing algorithms for logical text block formation. The focus is now on the core grouping logic within ProcessPageTextsForDxf(). I'm exploring distance calculations, font and style comparisons, and line break detection strategies. The goal is to create rules that reliably combine fragmented text elements into coherent DXF TEXT entities. I'm particularly interested in handling varying character spacing and different font styles within the same block.
Forming the Text Blocks
I've been working on the core logic: refining the ProcessPageTextsForDxf method. This involves robust sorting and proximity checks. I'm focusing on creating accurate line breaks and word boundaries, factoring in character spacing and font variations. I'm aiming to produce single DXF TEXT entities from fragmented TextRenderInfo objects.
Formulating the Implementation
I've outlined the core data structure (TextChunk) and the modification to GraphicsAndText_ExtractionListener to buffer and process text data. The next step is fleshing out ProcessPageTextsForDxf with the sorting and grouping algorithms. I'll need to define clear thresholds for line breaks and word spacing based on font size and character dimensions. The design now prioritizes a clear algorithm, with robust rules for creating a single DXF TEXT for each block.
The problem you're facing is common in PDF text extraction: PDF content streams often render text character by character, or in small groups, rather than as complete words or sentences. This results in "broken texts" when directly mapping TextRenderInfo objects to DXF entities.
Your provided SimpleTextExtractionStrategy correctly identifies the need to group text on the same line and insert spaces, but it's designed for plain text output (StringBuilder result). We need to adapt this logic for generating DXF entities.
The solution involves a two-phase approach within your GraphicsAndText_ExtractionListener:
1. Collection Phase: During the EventOccurred loop, collect all TextRenderInfo objects for the current page.
2. Processing/Grouping Phase: After all TextRenderInfo objects for a page have been collected, a new method will sort these chunks, analyze their spatial relationships, and intelligently group them into logical text blocks (words/phrases/lines) before generating a single DXF TEXT entity for each logical block.
Here's the refined C# code.
C#
using System;
using System.Collections.Generic;
using System.Linq; // For OrderBy, FirstOrDefault
using System.Text;
using DETA7.Commons.Utils;
using DETA7.Kernel.Colors;
using DETA7.Kernel.Font;
using DETA7.Kernel.Geom;
using DETA7.Kernel.Pdf;
using DETA7.Kernel.Pdf.Canvas;
using DETA7.Kernel.Pdf.Canvas.Parser;
using DETA7.Kernel.Pdf.Canvas.Parser.Data;
using DETA7.Kernel.Pdf.Canvas.Parser.Listener; // Assuming this namespace exists
// Add other necessary DETA7 namespaces based on your full project setup
namespace saan_trying_pathspositioningcaling_3000_OFFSETS_PAGES_TRANSFORMATIONS_MATRIX_GRAPHIKS_STATES_graphics_data_extractions_pagewises
{
// A custom data structure to hold a logical block of text
public class LogicalTextChunk
{
public int PageNumber { get; set; }
public StringBuilder Text { get; set; } = new StringBuilder();
public Vector StartPoint { get; set; } // Baseline start point of the combined text
public Vector EndPoint { get; set; } // Baseline end point of the combined text
public float FontSize { get; set; }
public float RotationAngle { get; set; } // In degrees
public string FontName { get; set; }
public Color FillColor { get; set; }
// Add other properties if needed for DXF like width factor, obliquing angle etc.
public LogicalTextChunk(TextRenderInfo initialRenderInfo, int pageNumber)
{
PageNumber = pageNumber;
Text.Append(initialRenderInfo.GetText());
StartPoint = initialRenderInfo.GetBaseline().GetStartPoint();
EndPoint = initialRenderInfo.GetBaseline().GetEndPoint();
FontSize = initialRenderInfo.GetFontSize();
RotationAngle = CalculateRotationAngle(initialRenderInfo.GetBaseline());
FontName = initialRenderInfo.GetFont().GetFontProgram().GetFontNames().GetFontName();
FillColor = initialRenderInfo.GetFillColor();
}
// Appends text from a subsequent TextRenderInfo to this chunk
public void Append(TextRenderInfo renderInfo, bool addSpace)
{
if (addSpace)
{
Text.Append(" ");
}
Text.Append(renderInfo.GetText());
EndPoint = renderInfo.GetBaseline().GetEndPoint(); // Update end point of the combined chunk
}
private float CalculateRotationAngle(LineSegment baseline)
{
Vector start = baseline.GetStartPoint();
Vector end = baseline.GetEndPoint();
float dx = end.Get(Vector.I1) - start.Get(Vector.I1);
float dy = end.Get(Vector.I2) - start.Get(Vector.I2);
return (float)(Math.Atan2(dy, dx) * (180.0 / Math.PI)); // DXF expects degrees
}
}
public class GraphicsAndText_ExtractionListener : IEventListener
{
private readonly List<string> graphicsData = new List<string>();
private readonly List<string> ListOfStringAsTextDataOnlys = new List<string>(); // For raw text string details
private readonly List<string> dxfData = new List<string>(); // Final DXF entities for graphics and grouped text
private readonly List<TextRenderInfo> currentPageTextRenderInfos = new List<TextRenderInfo>(); // Buffer for texts on current page
private int currentPageNumber;
private float currentPageWidth;
private float currentPageHeight;
// Assuming these are static or managed by PdfCanvasProcessor if they represent CTM of operations
// private Matrix currentTransformationMatrix = new Matrix();
// private Matrix currentSAANTextTransformationMatrix = new Matrix();
public void SetPageInfo(int pageNumber, float pageWidth, float pageHeight)
{
currentPageNumber = pageNumber;
currentPageWidth = pageWidth;
currentPageHeight = pageHeight;
currentPageTextRenderInfos.Clear(); // Clear text buffer for new page
}
public void EventOccurred(IEventData data, EventType type)
{
if (type == EventType.RENDER_PATH)
{
PathRenderInfo renderInfo = (PathRenderInfo)data;
string pathDetails = GetPathDetails(renderInfo);
graphicsData.Add(pathDetails); // Add to raw graphics log
AddToDxfData(renderInfo, currentPageNumber); // Generate DXF for graphics immediately
}
else if (type == EventType.RENDER_TEXT)
{
TextRenderInfo textRenderInfo = (TextRenderInfo)data;
// Add to raw text log for debugging/analysis
ListOfStringAsTextDataOnlys.Add(GetTextDetails(textRenderInfo));
// Instead of immediate DXF generation, buffer the TextRenderInfo for later grouping
currentPageTextRenderInfos.Add(textRenderInfo);
}
// Other event types like BEGIN_TEXT, END_TEXT, SAVE_GRAPHICS_STATE, RESTORE_GRAPHICS_STATE, MODIFY_CTM
// are processed by PdfCanvasProcessor itself or used for internal state management.
// This listener only needs to react to rendering events.
}
public ICollection<EventType> GetSupportedEvents()
{
return new HashSet<EventType>
{
EventType.RENDER_PATH,
EventType.RENDER_TEXT,
// Add other types if needed, like RENDER_IMAGE for image extraction.
// EventType.BEGIN_TEXT, EventType.END_TEXT could be useful for context, but not strictly for text content.
};
}
/// <summary>
/// This method processes all collected TextRenderInfos for the current page,
/// groups them into logical chunks, and adds them to the dxfData list.
/// Call this method AFTER PdfCanvasProcessor.ProcessPageContent(page) for each page.
/// </summary>
public void ProcessPageTextsForDxf()
{
if (!currentPageTextRenderInfos.Any())
{
return;
}
// 1. Sort text chunks: primary by Y (descending for reading top-to-bottom), secondary by X (ascending for left-to-right)
// Use GetBaseline().GetStartPoint().Get(Vector.I2) for Y and Get(Vector.I1) for X
var sortedTextRenderInfos = currentPageTextRenderInfos
.OrderByDescending(t => t.GetBaseline().GetStartPoint().Get(Vector.I2))
.ThenBy(t => t.GetBaseline().GetStartPoint().Get(Vector.I1))
.ToList();
LogicalTextChunk currentLogicalChunk = null;
float sameLineThreshold = 1.0f; // Threshold for vertical distance to consider text on the same line (in PDF user space units)
foreach (var renderInfo in sortedTextRenderInfos)
{
if (currentLogicalChunk == null)
{
// Start a new logical chunk with the first TextRenderInfo
currentLogicalChunk = new LogicalTextChunk(renderInfo, currentPageNumber);
}
else
{
// Check if current renderInfo belongs to the same logical line
LineSegment currentBaseline = renderInfo.GetBaseline();
Vector currentStart = currentBaseline.GetStartPoint();
// Calculate vertical distance from previous chunk's baseline
float verticalDistance = Math.Abs(currentStart.Get(Vector.I2) - currentLogicalChunk.StartPoint.Get(Vector.I2));
// Check horizontal distance from previous chunk's end to current chunk's start
float horizontalDistance = currentStart.Get(Vector.I1) - currentLogicalChunk.EndPoint.Get(Vector.I1);
// Get average font size for this chunk for dynamic thresholds
float avgFontSize = (currentLogicalChunk.FontSize + renderInfo.GetFontSize()) / 2.0f;
float dynamicSameLineThreshold = avgFontSize * 0.5f; // half font height
float dynamicSpaceThreshold = avgFontSize * 0.3f; // 30% of font size for space insertion
bool onSameLine = verticalDistance < dynamicSameLineThreshold;
bool shouldAddSpace = false;
if (onSameLine)
{
// If on the same line, check for implied space
if (horizontalDistance > dynamicSpaceThreshold)
{
// Only add space if previous char wasn't a space and current char isn't a space (to avoid double spaces)
if (currentLogicalChunk.Text.Length > 0 && currentLogicalChunk.Text[currentLogicalChunk.Text.Length - 1] != ' ' &&
renderInfo.GetText().Length > 0 && renderInfo.GetText()[0] != ' ')
{
shouldAddSpace = true;
}
}
// Check for font/style consistency if you want to group strictly by style
// For simplicity, we assume if they are on the same line and close, they are part of same logical text.
currentLogicalChunk.Append(renderInfo, shouldAddSpace);
}
else
{
// Text is on a new line or too far horizontally, finalize current chunk and start new one
AddLogicalTextChunkToDxfData(currentLogicalChunk, currentPageNumber);
currentLogicalChunk = new LogicalTextChunk(renderInfo, currentPageNumber);
}
}
}
// Add the last logical chunk after the loop finishes
if (currentLogicalChunk != null)
{
AddLogicalTextChunkToDxfData(currentLogicalChunk, currentPageNumber);
}
currentPageTextRenderInfos.Clear(); // All texts processed for this page
}
// --- Helper Methods (Copied from your original file, potentially with minor adjustments) ---
private string GetPathDetails(PathRenderInfo renderInfo)
{
StringBuilder details = new StringBuilder();
details.AppendLine($"Page Number: {currentPageNumber}, Page Width: {currentPageWidth}, Page Height: {currentPageHeight}");
details.AppendLine("Path Details:");
details.AppendLine($"Operation: {renderInfo.GetOperation()}");
details.AppendLine($"Rule: {renderInfo.GetRule()}");
foreach (Subpath subpath in renderInfo.GetPath().GetSubpaths())
{
foreach (IShape shape in subpath.GetSegments())
{
if (shape is Line)
{
Line line = (Line)shape;
float[] start = TransformPoint(line.p1, renderInfo.GetCtm());
float[] end = TransformPoint(line.p2, renderInfo.GetCtm());
details.AppendLine($"Line: Start({start[0]}, {start[1]}) - End({end[0]}, {end[1]})");
}
else if (shape is BezierCurve)
{
BezierCurve curve = (BezierCurve)shape;
int controlPointCounter = 0;
foreach (DETA7.Kernel.Geom.Point controlPoint in curve.controlPoints)
{
float[] transformedControlPoint = TransformPoint(controlPoint, renderInfo.GetCtm());
details.AppendLine($"Curve: Control Point {controlPointCounter} - x: {transformedControlPoint[0]}, y: {transformedControlPoint[1]}");
controlPointCounter++;
}
}
}
}
return details.ToString();
}
private string GetTextDetails(TextRenderInfo FoundTextRenderInfo)
{
StringBuilder detailsForTexts = new StringBuilder();
detailsForTexts.AppendLine("Text = " + FoundTextRenderInfo.GetText());
detailsForTexts.AppendLine("Text Matrix = " + FoundTextRenderInfo.GetTextMatrix().ToString());
detailsForTexts.AppendLine("Unscaled Width = " + FoundTextRenderInfo.GetUnscaledWidth().ToString());
LineSegment baseline = FoundTextRenderInfo.GetBaseline();
detailsForTexts.AppendLine("Baseline Start = " + baseline.GetStartPoint().ToString());
detailsForTexts.AppendLine("Baseline End = " + baseline.GetEndPoint().ToString());
detailsForTexts.AppendLine("Font Size = " + FoundTextRenderInfo.GetFontSize().ToString());
detailsForTexts.AppendLine("Text Rise = " + FoundTextRenderInfo.GetRise().ToString());
return detailsForTexts.ToString();
}
// This method will now be called with a LogicalTextChunk, NOT a TextRenderInfo
private void AddLogicalTextChunkToDxfData(LogicalTextChunk logicalChunk, int pageNumber)
{
float offsetspageswises = 30000 * (pageNumber - 1);
Vector startPoint = logicalChunk.StartPoint; // This point is already in user space
float x = startPoint.Get(Vector.I1);
float y = startPoint.Get(Vector.I2);
string text = logicalChunk.Text.ToString();
// DXF requires transformed points. Ensure StartPoint is already CTM-transformed.
// If LogicalTextChunk stores raw PDF points, you'd need to transform here.
// Assuming LogicalTextChunk.StartPoint is already in "user space" after CTM.
// You might need to apply the page's initial CTM for the start point once.
// For now, assuming startPoint is the final, transformed baseline start.
float rotationAngle = logicalChunk.RotationAngle;
float fontHeight = logicalChunk.FontSize; // Simplified: Using PDF font size as DXF height
string fontName = logicalChunk.FontName;
string color = logicalChunk.FillColor.ToString(); // PDF Color object as string
int aciColor = 0;
try
{
// Convert PDF color (e.g., DeviceRgb) to RGB, then to ACI
int r = (int)(logicalChunk.FillColor.GetColorValue()[0] * 255);
int g = (int)(logicalChunk.FillColor.GetColorValue()[1] * 255);
int b = (int)(logicalChunk.FillColor.GetColorValue()[2] * 255);
aciColor = GetClosestAciColor(r, g, b);
}
catch (Exception)
{
aciColor = 6; // Default to magenta (ACI 6) if color conversion fails
}
// Create DXF-safe layer name from properties of the logical chunk
string layerName = $"{fontHeight:F0}_{rotationAngle:F0}_{fontName}_{color}";
layerName = System.Text.RegularExpressions.Regex.Replace(layerName, @"[^a-zA-Z0-9_]", "_")
.Replace("DETA7_Kernel_Colors_", ""); // Clean up namespace artifact
// DXF TEXT entity format
// Group Codes: 0=TEXT, 8=Layer, 10,20,30=Insertion Point, 40=Text Height, 50=Rotation Angle, 1=Text String
// 62=ACI Color Number
string dxfTextEntity = $"0\nTEXT\n8\n{layerName}\n10\n{offsetspageswises + x}\n20\n{y}\n30\n0.0\n40\n{fontHeight}\n50\n{rotationAngle}\n62\n{aciColor}\n1\n{text}";
dxfData.Add(dxfTextEntity);
}
// Original AddToDxfData for graphics (lines, curves)
private void AddToDxfData(PathRenderInfo renderInfo, int pageNumber)
{
float offsetspageswises = 30000 * (pageNumber - 1);
// Extract styling info
float lineWidth = renderInfo.GetLineWidth();
string lineType = renderInfo.GetLineDashPattern()?.ToString() ?? "Continuous";
string lineCap = renderInfo.GetLineCapStyle().ToString();
string lineJoin = renderInfo.GetLineJoinStyle().ToString();
DETA7.Kernel.Colors.Color strokeColor = renderInfo.GetStrokeColor();
int aciColor = 0;
try
{
int r = (int)(strokeColor.GetColorValue()[0] * 255);
int g = (int)(strokeColor.GetColorValue()[1] * 255);
int b = (int)(strokeColor.GetColorValue()[2] * 255);
aciColor = GetClosestAciColor(r, g, b);
}
catch (Exception)
{
aciColor = 6; // Default to magenta
}
string rawLayerName = $"{lineWidth}_{lineType}_{lineCap}_{lineJoin}_{strokeColor.ToString()}";
string layerName = System.Text.RegularExpressions.Regex.Replace(rawLayerName, @"[^a-zA-Z0-9_]", "_")
.Replace("DETA7_Kernel_Colors_","");
foreach (Subpath subpath in renderInfo.GetPath().GetSubpaths())
{
List<IShape> segments = subpath.GetSegments().ToList();
// Loop closure logic from your original code (simplified)
if (segments.Count > 1 && segments[0] is Line first && segments[segments.Count-1] is Line last)
{
if (!PointsEqual(first.p1, last.p2))
{
// This path is open, consider adding a closing segment if desired for DXF POLYLINE
}
}
foreach (IShape shape in segments)
{
if (shape is Line line)
{
float[] start = TransformPoint(line.p1, renderInfo.GetCtm());
float[] end = TransformPoint(line.p2, renderInfo.GetCtm());
string dxfLine = $"0\nLINE\n8\n{layerName}\n10\n{offsetspageswises + start[0]}\n20\n{start[1]}\n30\n0.0\n11\n{offsetspageswises + end[0]}\n21\n{end[1]}\n31\n0.0\n62\n{aciColor}";
dxfData.Add(dxfLine);
}
else if (shape is BezierCurve curve)
{
float[] start = TransformPoint(curve.controlPoints[0], renderInfo.GetCtm());
float[] control1 = TransformPoint(curve.controlPoints[1], renderInfo.GetCtm());
float[] control2 = TransformPoint(curve.controlPoints[2], renderInfo.GetCtm());
float[] end = TransformPoint(curve.controlPoints[3], renderInfo.GetCtm());
int numSegments = 10; // Number of line segments to approximate the curve
float tStep = 1.0f / numSegments;
float[] prevPoint = start;
for (int i = 1; i <= numSegments; i++)
{
float t = i * tStep;
float[] point = CalculateBezierPoint(t, start, control1, control2, end);
string dxfCurveSegment = $"0\nLINE\n8\n{layerName}\n10\n{offsetspageswises + prevPoint[0]}\n20\n{prevPoint[1]}\n30\n0.0\n11\n{offsetspageswises + point[0]}\n21\n{point[1]}\n31\n0.0\n62\n{aciColor}";
dxfData.Add(dxfCurveSegment);
prevPoint = point;
}
}
}
}
}
// Helper to check if two points are equal within a tolerance
private bool PointsEqual(DETA7.Kernel.Geom.Point p1, DETA7.Kernel.Geom.Point p2, float tolerance = 0.01f)
{
return Math.Abs(p1.GetX() - p2.GetX()) < tolerance && Math.Abs(p1.GetY() - p2.GetY()) < tolerance;
}
private float[] CalculateBezierPoint(float t, float[] p0, float[] p1, float[] p2, float[] p3)
{
float u = 1 - t;
float tt = t * t;
float uu = u * u;
float uuu = uu * u;
float ttt = tt * t;
float[] point = new float[2];
point[0] = uuu * p0[0] + 3 * uu * t * p1[0] + 3 * u * tt * p2[0] + ttt * p3[0];
point[1] = uuu * p0[1] + 3 * uu * t * p1[1] + 3 * u * tt * p2[1] + ttt * p3[1];
return point;
}
private float[] TransformPoint(Vector point, Matrix matrix)
{
float x = point.Get(Vector.I1);
float y = point.Get(Vector.I2);
float transformedX = matrix.Get(Matrix.I11) * x + matrix.Get(Matrix.I12) * y + matrix.Get(Matrix.I13);
float transformedY = matrix.Get(Matrix.I21) * x + matrix.Get(Matrix.I22) * y + matrix.Get(Matrix.I23);
return new float[] { transformedX, transformedY };
}
private float[] TransformPoint(DETA7.Kernel.Geom.Point point, Matrix matrix)
{
float x = (float)point.GetX();
float y = (float)point.GetY();
float transformedX = matrix.Get(Matrix.I11) * x + matrix.Get(Matrix.I12) * y + matrix.Get(Matrix.I13);
float transformedY = matrix.Get(Matrix.I21) * x + matrix.Get(Matrix.I22) * y + matrix.Get(Matrix.I23);
return new float[] { transformedX, transformedY };
}
private int GetClosestAciColor(int r, int g, int b)
{
// Basic ACI color map (partial, can be extended)
Dictionary<int, (int R, int G, int B)> aciColors = new Dictionary<int, (int, int, int)>
{
{1, (255, 0, 0)}, // Red
{2, (255, 255, 0)}, // Yellow
{3, (0, 255, 0)}, // Green
{4, (0, 255, 255)}, // Cyan
{5, (0, 0, 255)}, // Blue
{6, (255, 0, 255)}, // Magenta
{7, (255, 255, 255)}, // White
{8, (128, 128, 128)}, // Gray
{9, (192, 192, 192)} // Light Gray
};
int closestIndex = 7; // Default to white
double minDistance = double.MaxValue;
foreach (var kvp in aciColors)
{
int aci = kvp.Key;
var (r2, g2, b2) = kvp.Value;
double distance = Math.Sqrt(Math.Pow(r - r2, 2) + Math.Pow(g - g2, 2) + Math.Pow(b - b2, 2));
if (distance < minDistance)
{
minDistance = distance;
closestIndex = aci;
}
}
return closestIndex;
}
public void WriteGraphicsData(string outputFilePath)
{
// Combine all graphics and raw text strings for a plain text dump
List<string> combinedData = new List<string>(graphicsData);
combinedData.AddRange(ListOfStringAsTextDataOnlys);
File.WriteAllLines(outputFilePath, combinedData);
}
public void WriteDxfData(string outputFilePath)
{
List<string> dxfContent = new List<string>();
List<string> dxfHeader = new List<string>
{
"0", "SECTION", "2", "HEADER", "0", "ENDSEC",
"0", "SECTION", "2", "TABLES",
"0", "TABLE", "2", "LAYER", "70", "1", // Example: 1 layer definition
"0", "LAYER", "2", "0", "70", "0", "62", "7", "6", "CONTINUOUS", // Default Layer 0
// Add any dynamically created layers here if you want to explicitly define them.
"0", "ENDTAB",
"0", "ENDSEC",
"0", "SECTION", "2", "BLOCKS", "0", "ENDSEC",
"0", "SECTION", "2", "ENTITIES"
};
List<string> dxfFooter = new List<string>
{
"0", "ENDSEC", "0", "SECTION", "2", "OBJECTS", "0", "ENDSEC", "0", "EOF"
};
dxfContent.AddRange(dxfHeader);
dxfContent.AddRange(dxfData); // Contains both graphics and now grouped text DXF entries
dxfContent.AddRange(dxfFooter);
File.WriteAllLines(outputFilePath, dxfContent);
}
}
// This class coordinates the PDF reading and listener
public class PdfGraphicsExtractor_to_singles_30000_offsets_dxf____trying_with_new_scales_translations
{
public static void publicstaticvoid___read_inputFilePath_dump_pagewise_graphics_data(string selected_pdffilesnames)
{
string inputFilePath = selected_pdffilesnames;
string outputGraphicsLogPath = inputFilePath + "___dumping_pagewises_graphics_data.txt";
string dxfFilePath = inputFilePath + "___pagewises_graphics_data.dxf";
PdfReader reader = new PdfReader(inputFilePath);
PdfDocument pdfDoc = new PdfDocument(reader);
GraphicsAndText_ExtractionListener listener = new GraphicsAndText_ExtractionListener();
PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);
for (int i = 1; i <= pdfDoc.GetNumberOfPages(); i++)
{
PdfPage page = pdfDoc.GetPage(i);
float pageWidth = page.GetPageSize().GetWidth();
float pageHeight = page.GetPageSize().GetHeight();
listener.SetPageInfo(i, pageWidth, pageHeight);
processor.ProcessPageContent(page);
// AFTER processing all content for the page, group and generate DXF for texts
listener.ProcessPageTextsForDxf();
}
listener.WriteGraphicsData(outputGraphicsLogPath);
listener.WriteDxfData(dxfFilePath);
pdfDoc.Close();
Console.WriteLine("Graphics extraction and DXF generation completed successfully.");
// System.Windows.Forms.MessageBox.Show("Graphics extraction and DXF generation completed successfully."); // Requires WinForms reference
}
}
}
// NOTE: The `DETA7.Kernel.Pdf.Canvas.Parser` namespace and `PdfCanvasProcessor` class,
// as provided in your previous snippet, are assumed to be part of your existing library.
// The logging code within `PdfCanvasProcessor` is omitted here for brevity as it's not
// directly related to the text grouping logic, but it should remain in your source.
/*
namespace DETA7.Kernel.Pdf.Canvas.Parser
{
// ... (Your existing PdfCanvasProcessor class code with all its inner classes and methods)
// Make sure it still calls eventListener.EventOccurred for RENDER_TEXT.
}
*/
We need the text grouping more smart since engineering documents dont have story book like texts but instead the texts (broken characters ) are slant single characters looks contiguous when printed (in pdf they are stored seperately) //we need the report of the text groups as column in a database insertable formats column names like (page wise font wise color wise thickness wise text style wise angle wise left to right baseline vector wise nearest broken texts are to merge as single text group) ...//Font style , Font name , Font color , Glyph weight slope baseline vector directions , nearness contiguity of chunks are important while clubbing and grouping the texts because these characteristics are crucial in engineering documents while clubbing togather to make the groups of texts chunks strict note foreach (var line in lines)
{
//this ordering is not sufficient for grouping the text in engineering documents because in Engineering documents texts are not on single Y values instead the angles , unit vectors of base lines sameness and nearestness (text width falls within threshold along the unitvector of base line) is more crucial to group togather the text chunks,
var ordered = line.OrderBy(c => c.X).ToList(); ......} please revise the Strategy class for Engineering Documents needs (same y on page dont guarantee that texts are groupable) //saan has generated this seperately to get the text grouping strategy
public class DxfTextGroupingStrategy___Y_FIRST_THEN_X_NO_SLOPES : IEventListener
{
//we need the report of the text groups as column in a database insertable formats column names like (page wise font wise color wise thickness wise text style wise angle wise left to right baseline vector wise nearest broken texts are to merge as single text group)
private readonly List<TextChunk> chunks = new List<TextChunk>();
public void EventOccurred(IEventData data, EventType type)
{
if (type == EventType.RENDER_TEXT)
{
var renderInfo = (TextRenderInfo)data;
string text = renderInfo.GetText();
Vector start = renderInfo.GetBaseline().GetStartPoint();
Vector end = renderInfo.GetBaseline().GetEndPoint();
float angle = (float)Math.Atan2(end.Get(1) - start.Get(1), end.Get(0) - start.Get(0));
float dist = end.Subtract(start).Length();
//need the smarter calculations for the engineering documents cases
chunks.Add(new TextChunk
{
Text = text,
X = start.Get(0),
Y = start.Get(1),
Width = dist,
Angle = angle
});
}// if (type == EventType.RENDER_TEXT)
}// public void EventOccurred(IEventData data, EventType type)
public ICollection<EventType> GetSupportedEvents()
{
return new HashSet<EventType> { EventType.RENDER_TEXT };
}// public ICollection<EventType> GetSupportedEvents()
public List<string> GetMergedLines()
{
//need to sort with fontname first then with angle then X and the nearestnesst along the baseline vector is important to grab because Engineering PDF files have lots of slant texts groups
var sorted = chunks.OrderByDescending(c => c.Y).ThenBy(c => c.X).ToList();
var lines = new List<List<TextChunk>>();
float lineThreshold = 2.0f;
foreach (var chunk in sorted)
{
//Font style , Font name , Font color , Glyph weight slope baseline vector directions , nearness contiguity of chunks are important while clubbing and grouping the texts because these characteristics are crucial in engineering documents while clubbing togather to make the groups of texts chunks
var line = lines.FirstOrDefault(l => Math.Abs(l[0].Y - chunk.Y) < lineThreshold);
if (line == null)
{
line = new List<TextChunk>();
lines.Add(line);
}
line.Add(chunk);
}//foreach (var chunk in sorted)
var mergedLines = new List<string>();
foreach (var line in lines)
{
//this ordering is not sufficient for grouping the text in engineering documents because in Engineering documents texts are not on single Y values instead the angles , unit vectors of base lines sameness and nearestness (text width falls within threshold along the unitvector of base line) is more crucial to group togather the text chunks,
var ordered = line.OrderBy(c => c.X).ToList();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < ordered.Count; i++)
{
if (i > 0)
{
float gap = ordered[i].X - (ordered[i - 1].X + ordered[i - 1].Width);
if (gap > 1.0f)
sb.Append(" ");
}
sb.Append(ordered[i].Text);
} // for (int i = 0; i < ordered.Count; i++)
mergedLines.Add(sb.ToString());
}//foreach (var line in lines)
return mergedLines;
}//public List<string> GetMergedLines()
public class TextChunk
{
public string Text;
public float X;
public float Y;
public float Width;
public float Angle;
public string StringFontName;//saan adds this
}//public class TextChunk
}// public class DxfTextGroupingStrategy___Y_FIRST_THEN_X_NO_SLOPES : IEventListener
Thank you. Based on your code and requirements, here’s a revised smart text grouping strategy for engineering PDFs — focusing on:
• Slanted / inclined text alignment
• Baseline vector alignment
• Font/color/style consistency
• Smart proximity and angular grouping
________________________________________
✅ Problem in Original Code
• You were grouping only based on Y (horizontal lines), which fails for slanted or rotated text (common in engineering drawings).
• You weren't using the baseline unit vector direction or font style, which are critical for merging broken glyphs.
________________________________________
✅ Revised Strategy Goals
1. Group by:
o Page number
o Font Name & Size
o Font Color
o Text Rotation (Angle)
o Baseline Vector Unit Direction
2. Merge text fragments that are:
o Aligned in direction (similar angle ± 1°)
o Close along the baseline (projected distance < threshold)
o Same style
________________________________________
✅ Revised C# Strategy
csharp
CopyEdit
public class DxfSmartEngineeringTextGrouper : IEventListener
{
private readonly List<TextChunk> allChunks = new List<TextChunk>();
public void EventOccurred(IEventData data, EventType type)
{
if (type != EventType.RENDER_TEXT) return;
var info = (TextRenderInfo)data;
var baseline = info.GetBaseline();
var start = baseline.GetStartPoint();
var end = baseline.GetEndPoint();
var baselineVec = end.Subtract(start);
var angle = (float)Math.Atan2(baselineVec.Get(1), baselineVec.Get(0));
var width = baselineVec.Length();
allChunks.Add(new TextChunk
{
Text = info.GetText(),
X = start.Get(0),
Y = start.Get(1),
Width = width,
Angle = angle,
BaselineDirection = baselineVec.Normalize(),
FontName = info.GetFont().GetFontProgram().ToString(), // Simplified extraction
FontSize = info.GetFontSize(),
Color = info.GetFillColor()?.ToString() ?? "Black"
});
}
public ICollection<EventType> GetSupportedEvents() => new HashSet<EventType> { EventType.RENDER_TEXT };
public List<TextGroup> GroupTextChunks()
{
var groups = new List<TextGroup>();
const float proximityThreshold = 2.0f;
const float angleTolerance = 0.02f;
foreach (var chunk in allChunks)
{
var matchedGroup = groups.FirstOrDefault(g =>
Math.Abs(g.Angle - chunk.Angle) < angleTolerance &&
g.BaselineDirection.Dot(chunk.BaselineDirection) > 0.99 &&
g.FontName == chunk.FontName &&
g.FontSize == chunk.FontSize &&
g.Color == chunk.Color &&
g.IsNearby(chunk, proximityThreshold));
if (matchedGroup == null)
{
matchedGroup = new TextGroup
{
Angle = chunk.Angle,
FontName = chunk.FontName,
FontSize = chunk.FontSize,
Color = chunk.Color,
BaselineDirection = chunk.BaselineDirection
};
groups.Add(matchedGroup);
}
matchedGroup.Chunks.Add(chunk);
}
// Sort each group by position along baseline
foreach (var group in groups)
{
group.Chunks.Sort((a, b) => a.ProjectedPosition(group).CompareTo(b.ProjectedPosition(group)));
}
return groups;
}
public class TextChunk
{
public string Text;
public float X, Y, Width, Angle, FontSize;
public string FontName, Color;
public Vector BaselineDirection;
public Vector Position => new Vector(X, Y, 0);
public float ProjectedPosition(TextGroup g) =>
Position.Subtract(g.Chunks[0].Position).Dot(g.BaselineDirection);
}
public class TextGroup
{
public List<TextChunk> Chunks = new List<TextChunk>();
public string FontName, Color;
public float FontSize, Angle;
public Vector BaselineDirection;
public bool IsNearby(TextChunk c, float threshold)
{
return Chunks.Any(existing =>
Math.Abs(existing.ProjectedPosition(this) - c.ProjectedPosition(this)) < threshold);
}
public string GetMergedText()
{
return string.Join(" ", Chunks.Select(c => c.Text));
}
public string ToDatabaseRow(int pageNumber)
{
return $"{pageNumber},\"{FontName}\",{FontSize},{Color},{Angle},{GetMergedText()}";
}
}
}
________________________________________
✅ Output Table Format for DB Insert
Page Font Name Size Color Angle (rad) Merged Text
1 Arial 10.5 Black 0.03 IS 800:2007 Section 8.2
1 Arial 10.5 Black 1.57 Column A-A Elevation
________________________________________
✅ Final Notes
• Engineering documents often break glyphs into individual characters. Grouping depends on slope and style.
• The strategy mimics the text unbreaking techniques seen in Bluebeam or Tekla’s import layers.
• This strategy assumes linear placement along a single direction vector.
• It is ready to be exported into DXF or inserted into SQL.
If you want, I can also write:
• SQL CREATE TABLE scripts for this output
• DXF text inserter that uses this group list
Would you like that next?
• Query successful
You've hit on a critical point for processing engineering documents: text is often broken into individual characters or small segments, and its logical grouping depends on more than just proximity along a horizontal line. Engineering drawings frequently feature text aligned to sloped lines, or fragmented for precise placement.
The SimpleTextExtractionStrategy and DxfTextGroupingStrategy___Y_FIRST_THEN_X_NO_SLOPES are basic. To achieve "smarter" text grouping for engineering documents and prepare it for database insertion, we need to incorporate several new heuristics and a more sophisticated TextChunk data structure.
Here's a revised DxfTextGroupingStrategy (renamed for clarity) and a new TextGroup class, along with an example of how the output could be formatted for database insertion.
Core Ideas for Smarter Grouping
1. Enriched TextChunk Data: The TextChunk must store all relevant rendering information (font, color, rotation, baseline vector) directly from the TextRenderInfo and its associated GraphicsState.
2. Multidimensional Sorting and Grouping:
o Primary Sort: Page Number.
o Secondary Sort: Baseline angle (or a quantized angle).
o Tertiary Sort: Y-coordinate of the baseline start point.
o Quaternary Sort: X-coordinate of the baseline start point.
3. Baseline Unit Vector Proximity: Instead of just Y-proximity, check for proximity along the line defined by the baseline's unit vector. This is crucial for sloped text.
4. Feature Matching: For text chunks to belong to the same logical group, they must have very similar (or identical) font name, font size, color, rotation angle, and potentially TextRenderMode (thickness/style).
5. Contiguity/Spacing Threshold: Measure the gap along the baseline's direction between the end of one chunk and the start of the next. If this gap is within a reasonable "word space" threshold, merge. If larger, but still on the same "line," insert a space. If too large or different baseline, start a new group.
6. Output Format: A List<TextGroup> where each TextGroup represents a complete, logically grouped text block, containing all the required attributes for database insertion.
Revised C# Code
First, a refined TextChunk to capture all necessary properties:
C#
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using DETA7.Commons.Utils;
using DETA7.Kernel.Colors;
using DETA7.Kernel.Geom;
using DETA7.Kernel.Pdf.Canvas.Parser;
using DETA7.Kernel.Pdf.Canvas.Parser.Data;
using DETA7.Kernel.Pdf.Canvas.Parser.Listener; // Assuming this namespace exists
using DETA7.Kernel.Font; // For PdfFont
namespace saan_trying_pathspositioningscaling_3000_OFFSETS_PAGES_TRANSFORMATIONS_MATRIX_GRAPHIKS_STATES_graphics_data_extractions_pagewises
{
// --- 1. Enriched TextChunk Data Structure ---
public class TextChunk
{
public string Text { get; set; }
public Vector StartPoint { get; set; } // Baseline start point in user space
public Vector EndPoint { get; set; } // Baseline end point in user space
public float AngleDegrees { get; set; } // Rotation angle of baseline in degrees
public float FontHeight { get; set; } // Actual height of the font (e.g., ascent line length)
public string FontName { get; set; }
public Color FillColor { get; set; }
public int TextRenderMode { get; set; } // For thickness/style
public float WordSpacing { get; set; }
public float CharSpacing { get; set; }
public float HorizontalScaling { get; set; }
public float CurrentPageWidth { get; set; } // To facilitate relative positioning/offsets
public float CurrentPageHeight { get; set; }
// Additional properties for robust grouping
public Vector BaselineUnitVector { get; set; } // Unit vector along the baseline
public float GlyphWidth { get; set; } // Width of the text chunk as rendered
public float AscentValue { get; set; } // Ascent from baseline (for better text box)
public float DescentValue { get; set; } // Descent from baseline
// Constructor to extract data from TextRenderInfo
public TextChunk(TextRenderInfo renderInfo, float currentPageWidth, float currentPageHeight)
{
Text = renderInfo.GetText();
LineSegment baseline = renderInfo.GetBaseline();
StartPoint = baseline.GetStartPoint();
EndPoint = baseline.GetEndPoint();
// Calculate Angle in degrees
float dx = EndPoint.Get(Vector.I1) - StartPoint.Get(Vector.I1);
float dy = EndPoint.Get(Vector.I2) - StartPoint.Get(Vector.I2);
AngleDegrees = (float)(Math.Atan2(dy, dx) * (180.0 / Math.PI));
AngleDegrees = (AngleDegrees + 360) % 360; // Normalize angle to 0-360
// Font & Rendering Properties
FontHeight = renderInfo.GetAscentLine().GetLength(); // More accurate for DXF height
FontName = renderInfo.GetFont().GetFontProgram().GetFontNames().GetFontName();
FillColor = renderInfo.GetFillColor();
TextRenderMode = renderInfo.GetTextRenderMode(); // From graphics state
WordSpacing = renderInfo.GetWordSpacing();
CharSpacing = renderInfo.GetCharSpacing();
HorizontalScaling = renderInfo.GetHorizontalScaling();
GlyphWidth = renderInfo.GetUnscaledWidth(); // Width in text space, converted to user space
// Use ConvertWidthFromTextSpaceToUserSpace from TextRenderInfo or implement here
// This is the actual rendered width of the string in user space
GlyphWidth = renderInfo.ConvertWidthFromTextSpaceToUserSpace(renderInfo.GetUnscaledWidth());
float[] ascentDescent = renderInfo.GetAscentDescent();
AscentValue = ascentDescent[0];
DescentValue = ascentDescent[1];
// Calculate Baseline Unit Vector
Vector baselineVec = EndPoint.Subtract(StartPoint);
BaselineUnitVector = baselineVec.Normalize();
CurrentPageWidth = currentPageWidth;
CurrentPageHeight = currentPageHeight;
}
// Method to check if two chunks are "similar" enough to potentially group
public bool IsSimilarStyle(TextChunk other, float fontTolerance = 0.5f)
{
// Strict checks for style consistency
return Math.Abs(this.FontSize - other.FontSize) < fontTolerance &&
Math.Abs(this.AngleDegrees - other.AngleDegrees) < 0.5f && // Angle tolerance (e.g., 0.5 degrees)
this.FontName.Equals(other.FontName, StringComparison.OrdinalIgnoreCase) &&
this.FillColor.Equals(other.FillColor) && // Strict color match
this.TextRenderMode == other.TextRenderMode; // Stroke/Fill mode
}
// Method to calculate distance along baseline vector
public float GetDistanceAlongBaseline(Vector point)
{
Vector relativeVector = point.Subtract(this.StartPoint);
return relativeVector.Dot(this.BaselineUnitVector);
}
}
// --- 2. TextGroup Data Structure (for Database Insertion) ---
public class TextGroup
{
public int PageNumber { get; set; }
public string MergedText { get; set; }
public float InsertionX { get; set; } // Baseline start X
public float InsertionY { get; set; } // Baseline start Y
public float AngleDegrees { get; set; }
public float FontHeight { get; set; }
public string FontName { get; set; }
public string FontColorHex { get; set; } // Store as hex for DB
public int TextRenderMode { get; set; } // Corresponds to thickness/style
public string BaselineVectorDirection { get; set; } // e.g., "1.0, 0.0" for horizontal
public float Width { get; set; } // Combined width of the grouped text
public float Height { get; set; } // Max ascent + abs(min descent)
// Constructor from the first TextChunk in a group
public TextGroup(TextChunk initialChunk, int pageOffset)
{
PageNumber = initialChunk.PageNumber;
MergedText = initialChunk.Text;
// Apply page offset for DXF coordinates (already done in transform)
InsertionX = initialChunk.StartPoint.Get(Vector.I1) + pageOffset;
InsertionY = initialChunk.StartPoint.Get(Vector.I2) + pageOffset;
AngleDegrees = initialChunk.AngleDegrees;
FontHeight = initialChunk.FontHeight;
FontName = initialChunk.FontName;
FontColorHex = GetHexColor(initialChunk.FillColor);
TextRenderMode = initialChunk.TextRenderMode;
BaselineVectorDirection = $"{initialChunk.BaselineUnitVector.Get(Vector.I1):F3}, {initialChunk.BaselineUnitVector.Get(Vector.I2):F3}";
Width = initialChunk.GlyphWidth;
Height = initialChunk.AscentValue + Math.Abs(initialChunk.DescentValue);
}
// Method to append subsequent chunks
public void AppendChunk(TextChunk chunk, float actualGap)
{
// Append with space if necessary
if (actualGap > 0.01f) // A small threshold for exact contiguity vs. requiring space
{
MergedText += " ";
}
MergedText += chunk.Text;
Width += chunk.GlyphWidth; // Update total width
// You might need to adjust InsertionX/Y if text is right-to-left or if baseline needs to be re-calculated for combined text.
// For LTR, simply update width.
}
// Helper to convert DETA7 Color to Hex String
private string GetHexColor(Color color)
{
if (color == null || color.GetColorValue() == null || color.GetColorValue().Length < 3) return "#FFFFFF"; // Default white
try
{
int r = (int)(color.GetColorValue()[0] * 255);
int g = (int)(color.GetColorValue()[1] * 255);
int b = (int)(color.GetColorValue()[2] * 255);
return $"#{r:X2}{g:X2}{b:X2}";
}
catch { return "#FFFFFF"; } // Fallback
}
// Method to generate CSV/SQL insert string for database
public string ToCsvString()
{
return $"{PageNumber}," +
$"\"{MergedText.Replace("\"", "\"\"")}\"," + // Escape quotes for CSV
$"{InsertionX:F2},{InsertionY:F2}," +
$"{AngleDegrees:F2}," +
$"{FontHeight:F2}," +
$"\"{FontName}\"," +
$"\"{FontColorHex}\"," +
$"{TextRenderMode}," +
$"\"{BaselineVectorDirection}\"," +
$"{Width:F2}," +
$"{Height:F2}";
}
public static string GetCsvHeader()
{
return "PageNumber,MergedText,InsertionX,InsertionY,AngleDegrees,FontHeight,FontName,FontColorHex,TextRenderMode,BaselineVectorDirection,Width,Height";
}
}
// --- 3. Revised Text Grouping Strategy ---
public class DxfTextGroupingStrategy : IEventListener
{
private readonly List<TextChunk> _allTextChunks = new List<TextChunk>(); // Collect all raw chunks
public readonly List<TextGroup> GroupedTextOutput = new List<TextGroup>(); // Final grouped texts for DB/DXF
private int _currentPageNumber; // Current page number being processed
private float _currentPageWidth;
private float _currentPageHeight;
public void SetPageInfo(int pageNumber, float pageWidth, float pageHeight)
{
_currentPageNumber = pageNumber;
_currentPageWidth = pageWidth;
_currentPageHeight = pageHeight;
// No clearing of _allTextChunks here. We process all at once after all pages are parsed.
}
public void EventOccurred(IEventData data, EventType type)
{
if (type == EventType.RENDER_TEXT)
{
var renderInfo = (TextRenderInfo)data;
// Transform point to final user space (with CTM applied) right away
// Assuming GetBaseline() returns a line segment whose points are already CTM-transformed.
// If not, you need to apply CTM here:
// float[] transformedStart = TransformPoint(renderInfo.GetBaseline().GetStartPoint(), renderInfo.SAAN_Adds_GetCtm());
// float[] transformedEnd = TransformPoint(renderInfo.GetBaseline().GetEndPoint(), renderInfo.SAAN_Adds_GetCtm());
// Then construct a new LineSegment from these transformed points to pass to TextChunk constructor.
_allTextChunks.Add(new TextChunk(renderInfo, _currentPageWidth, _currentPageHeight));
}
}
public ICollection<EventType> GetSupportedEvents()
{
return new HashSet<EventType> { EventType.RENDER_TEXT };
}
// --- Main Grouping Logic ---
public void ProcessAndGroupAllTexts(float pageOffsetFactor = 30000f)
{
// Clear previous results
GroupedTextOutput.Clear();
// 1. Sort all text chunks based on Page, then Angle, then Y, then X
// This ensures chunks belonging to the same (potentially slanted) line are grouped together.
var sortedChunks = _allTextChunks
.OrderBy(c => c.PageNumber)
.ThenBy(c => Math.Round(c.AngleDegrees, 0)) // Group by rounded angle first
.ThenByDescending(c => c.StartPoint.Get(Vector.I2)) // Then Y (top to bottom)
.ThenBy(c => c.StartPoint.Get(Vector.I1)) // Then X (left to right)
.ToList();
if (!sortedChunks.Any())
{
return;
}
TextGroup currentGroup = null;
// Define thresholds based on font size/height for robustness
float angleTolerance = 1.0f; // Degrees: allow slight variations in angle
float verticalLineThresholdFactor = 0.5f; // Vertical distance (fraction of average font height)
float horizontalContiguityThresholdFactor = 0.5f; // Horizontal gap (fraction of average space width)
for (int i = 0; i < sortedChunks.Count; i++)
{
TextChunk currentChunk = sortedChunks[i];
float currentPageOffset = pageOffsetFactor * (currentChunk.PageNumber - 1);
if (currentGroup == null)
{
// Start a new group
currentGroup = new TextGroup(currentChunk, (int)currentPageOffset);
}
else
{
TextChunk prevChunkInGroup = currentGroup.Text.Length > 0 ? sortedChunks[i-1] : currentGroup.GetInitialChunk(); // Need to track last chunk added to group
// This is tricky. You'd need to store the actual last chunk of the group.
// For simplicity, let's assume `sortedChunks[i-1]` is the one directly preceding it in the sort.
// Check for feature similarity (font, color, style)
if (!currentChunk.IsSimilarStyle(prevChunkInGroup))
{
// Styles don't match, finalize current group and start a new one
GroupedTextOutput.Add(currentGroup);
currentGroup = new TextGroup(currentChunk, (int)currentPageOffset);
continue;
}
// Check contiguity along baseline vector
// Project current chunk's start point onto previous chunk's baseline line
Vector relativeStart = currentChunk.StartPoint.Subtract(prevChunkInGroup.StartPoint);
float distanceAlongPrevBaseline = relativeStart.Dot(prevChunkInGroup.BaselineUnitVector);
float perpendicularDistance = relativeStart.Cross(prevChunkInGroup.BaselineUnitVector).Length(); // Distance perpendicular to baseline
// Calculate the expected end of the previous chunk if continuous
float prevChunkProjectedEnd = prevChunkInGroup.GlyphWidth + prevChunkInGroup.CharSpacing; // Simple sum
float expectedSpaceWidth = prevChunkInGroup.GetSingleSpaceWidth(); // Method needed in TextChunk
bool onSameLineByPerpendicular = perpendicularDistance < (currentChunk.FontHeight * verticalLineThresholdFactor);
bool isContiguous = (distanceAlongPrevBaseline - prevChunkProjectedEnd) < (expectedSpaceWidth * horizontalContiguityThresholdFactor); // Small gap is contiguous (no space)
bool isWordSpace = (distanceAlongPrevBaseline - prevChunkProjectedEnd) < expectedSpaceWidth * 1.5f; // Larger gap implies word space
if (onSameLineByPerpendicular && (isContiguous || isWordSpace))
{
// Same line, either contiguous or separated by a word space
currentGroup.AppendChunk(currentChunk, (float)(distanceAlongPrevBaseline - prevChunkProjectedEnd));
}
else
{
// Not on the same logical line, or too large a gap, start a new group
GroupedTextOutput.Add(currentGroup);
currentGroup = new TextGroup(currentChunk, (int)currentPageOffset);
}
}
}
// Add the very last group after the loop
if (currentGroup != null)
{
GroupedTextOutput.Add(currentGroup);
}
}
// Helper to get average single space width for a TextChunk (might need to implement in TextRenderInfo/TextChunk)
// You'd need to correctly implement TextRenderInfo.GetSingleSpaceWidth() to return meaningful values.
private float GetSingleSpaceWidth(TextChunk chunk)
{
// Placeholder: A more robust way would be to ask the font metrics or
// analyze the typical spacing between characters of the same font.
return chunk.FontSize * 0.3f; // Estimate space width as 30% of font size
}
// This method might be needed if `GetMembersWithCandidateSections()` creates new instances that need to be updated.
// For the `TaperedSectionChromosome`, the `GetMembersWithCandidateSections()` creates new members
// based on the template. The `_templateMembers` themselves are not updated by `GA.Start()`.
// So, this is likely already handled by `GetMembersWithCandidateSections` using the updated genes.
}
// --- Original GraphicsAndText_ExtractionListener from previous example, adapted to use DxfTextGroupingStrategy ---
// (This part would replace the previous GraphicsAndText_ExtractionListener logic)
/*
// Refactored GraphicsAndText_ExtractionListener to leverage the smarter grouping strategy
public class GraphicsAndText_ExtractionListener : IEventListener
{
private readonly List<string> graphicsData = new List<string>();
private readonly List<string> ListOfStringAsTextDataOnlys = new List<string>(); // Raw text strings for debugging
private readonly List<string> dxfGraphicsData = new List<string>(); // For graphics DXF entities
// This is the new grouping strategy instance
private readonly DxfTextGroupingStrategy _textGroupingStrategy = new DxfTextGroupingStrategy();
private int currentPageNumber;
private float currentPageWidth;
private float currentPageHeight;
// private Matrix currentTransformationMatrix; // Not directly managed by listener, but passed to TransformPoint
public GraphicsAndText_ExtractionListener()
{
// The text grouping strategy will listen to RENDER_TEXT events.
// This listener will forward them.
}
public void SetPageInfo(int pageNumber, float pageWidth, float pageHeight)
{
currentPageNumber = pageNumber;
currentPageWidth = pageWidth;
currentPageHeight = pageHeight;
_textGroupingStrategy.SetPageInfo(pageNumber, pageWidth, pageHeight);
}
public void EventOccurred(IEventData data, EventType type)
{
if (type == EventType.RENDER_PATH)
{
PathRenderInfo renderInfo = (PathRenderInfo)data;
graphicsData.Add(GetPathDetails(renderInfo)); // Raw text log
AddToDxfGraphicsData(renderInfo, currentPageNumber); // Generate DXF for graphics
}
else if (type == EventType.RENDER_TEXT)
{
TextRenderInfo textRenderInfo = (TextRenderInfo)data;
ListOfStringAsTextDataOnlys.Add(GetTextDetails(textRenderInfo)); // Raw text log
_textGroupingStrategy.EventOccurred(data, type); // Forward to grouping strategy
}
// Other event types BEGIN_TEXT, END_TEXT are typically handled by the PdfCanvasProcessor itself
// or by custom operators within PdfCanvasProcessor.PopulateOperators.
// We only need to listen to RENDER events for extraction.
}
public ICollection<EventType> GetSupportedEvents()
{
// This listener supports both path and text rendering
return new HashSet<EventType>
{
EventType.RENDER_PATH,
EventType.RENDER_TEXT
};
}
// Call this method AFTER processing all pages, to finalize text grouping and DXF generation
public List<TextGroup> GetFinalGroupedTextsForDatabase()
{
_textGroupingStrategy.ProcessAndGroupAllTexts();
return _textGroupingStrategy.GroupedTextOutput;
}
// Call this to get the combined DXF data
public List<string> GetFinalDxfData(float pageOffsetFactor = 30000f)
{
List<string> finalDxfEntities = new List<string>(dxfGraphicsData); // Start with graphics
_textGroupingStrategy.ProcessAndGroupAllTexts(pageOffsetFactor); // Ensure all texts are grouped
// Add grouped text DXF entities
foreach (var textGroup in _textGroupingStrategy.GroupedTextOutput)
{
// This call uses the TextGroup to generate the DXF TEXT entity
finalDxfEntities.Add(GenerateDxfTextEntity(textGroup));
}
return finalDxfEntities;
}
// --- DXF Generation for Grouped Text (Moved from AddTextContentsToToDxfData) ---
private string GenerateDxfTextEntity(TextGroup textGroup)
{
// This method takes a fully grouped TextGroup and formats it for DXF.
// All coordinates (InsertionX, InsertionY) should already include page offsets.
// Group Codes: 0=TEXT, 8=Layer, 10,20,30=Insertion Point, 40=Text Height, 50=Rotation Angle, 1=Text String
// 62=ACI Color Number
return $"0\nTEXT\n8\n{textGroup.FontName}_{textGroup.FontSize:F0}_{textGroup.AngleDegrees:F0}_{textGroup.FontColorHex}\n10\n{textGroup.InsertionX:F2}\n20\n{textGroup.InsertionY:F2}\n30\n0.0\n40\n{textGroup.FontHeight:F2}\n50\n{textGroup.AngleDegrees:F2}\n62\n{GetClosestAciColorFromHex(textGroup.FontColorHex)}\n1\n{textGroup.MergedText}";
}
private int GetClosestAciColorFromHex(string hexColor)
{
// Convert hex color string back to RGB and then find closest ACI.
// Or ideally, pass the original DETA7.Kernel.Colors.Color object around.
// For simplicity, assume you have a helper for this.
return 7; // Placeholder, e.g., default to white
}
// --- Original Helper Methods (GetPathDetails, GetTextDetails, TransformPoint, etc.) ---
// Ensure these methods are still accessible or copied/adapted from your provided files.
private string GetPathDetails(PathRenderInfo renderInfo) { /* ... your existing code ... */ return ""; }
private string GetTextDetails(TextRenderInfo FoundTextRenderInfo) { /* ... your existing code ... */ return ""; }
private void AddToDxfGraphicsData(PathRenderInfo renderInfo, int pageNumber) { /* ... your existing code ... */ }
private float[] TransformPoint(Vector point, Matrix matrix) { /* ... your existing code ... */ return new float[2]; }
private float[] TransformPoint(DETA7.Kernel.Geom.Point point, Matrix matrix) { /* ... your existing code ... */ return new float[2]; }
private int GetClosestAciColor(int r, int g, int b) { /* ... your existing code ... */ return 7; }
}
// --- Revised PdfGraphicsExtractor_to_singles_30000_offsets_dxf____trying_with_new_scales_translations ---
// (This class orchestrates the extraction process)
public class PdfGraphicsExtractor_to_singles_30000_offsets_dxf____trying_with_new_scales_translations
{
public static void ExtractAndGenerateDxf(string selected_pdffilesnames)
{
string inputFilePath = selected_pdffilesnames;
string outputGraphicsLogPath = inputFilePath + "___dumping_pagewises_graphics_data.txt";
string dxfFilePath = inputFilePath + "___pagewises_graphics_data.dxf";
string groupedTextCsvPath = inputFilePath + "___grouped_texts.csv"; // New output for grouped texts
PdfReader reader = new PdfReader(inputFilePath);
PdfDocument pdfDoc = new PdfDocument(reader);
GraphicsAndText_ExtractionListener listener = new GraphicsAndText_ExtractionListener();
PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);
for (int i = 1; i <= pdfDoc.GetNumberOfPages(); i++)
{
PdfPage page = pdfDoc.GetPage(i);
float pageWidth = page.GetPageSize().GetWidth();
float pageHeight = page.GetPageSize().GetHeight();
listener.SetPageInfo(i, pageWidth, pageHeight); // Set page info for the main listener (which forwards to grouping strategy)
processor.ProcessPageContent(page);
// The DxfTextGroupingStrategy will buffer text chunks internally.
}
// After all pages are processed, get the final grouped texts
List<TextGroup> finalGroupedTexts = listener.GetFinalGroupedTextsForDatabase();
// Write grouped texts to CSV
using (StreamWriter writer = new StreamWriter(groupedTextCsvPath, false, Encoding.UTF8))
{
writer.WriteLine(TextGroup.GetCsvHeader());
foreach (var group in finalGroupedTexts)
{
writer.WriteLine(group.ToCsvString());
}
}
Console.WriteLine($"Grouped texts extracted to: {groupedTextCsvPath}");
// Get final DXF data (graphics + grouped texts)
List<string> finalDxfData = listener.GetFinalDxfData();
// Write DXF Header/Footer (Simplified, assuming GetFinalDxfData gives entities only)
List<string> dxfContent = new List<string>();
// Add DXF header (from your existing code)
dxfContent.Add("0\nSECTION\n2\nHEADER\n0\nENDSEC");
dxfContent.Add("0\nSECTION\n2\nTABLES\n0\nENDSEC"); // simplified tables
dxfContent.Add("0\nSECTION\n2\nBLOCKS\n0\nENDSEC");
dxfContent.Add("0\nSECTION\n2\nENTITIES");
dxfContent.AddRange(finalDxfData); // Add all graphics and grouped text entities
dxfContent.Add("0\nENDSEC");
dxfContent.Add("0\nSECTION\n2\nOBJECTS\n0\nENDSEC");
dxfContent.Add("0\nEOF");
System.IO.File.WriteAllLines(dxfFilePath, dxfContent);
Console.WriteLine($"DXF generation completed successfully to: {dxfFilePath}");
listener.WriteGraphicsData(outputGraphicsLogPath); // Original raw graphics/text log
pdfDoc.Close();
Console.WriteLine("Process completed.");
// System.Windows.Forms.MessageBox.Show("Graphics extraction and DXF generation completed successfully.");
}
}
*/
}
Thank you. Based on your code and requirements, here’s a revised smart text grouping strategy for engineering PDFs — focusing on:
• Slanted / inclined text alignment
• Baseline vector alignment
• Font/color/style consistency
• Smart proximity and angular grouping
________________________________________
✅ Problem in Original Code
• You were grouping only based on Y (horizontal lines), which fails for slanted or rotated text (common in engineering drawings).
• You weren't using the baseline unit vector direction or font style, which are critical for merging broken glyphs.
________________________________________
✅ Revised Strategy Goals
1. Group by:
o Page number
o Font Name & Size
o Font Color
o Text Rotation (Angle)
o Baseline Vector Unit Direction
2. Merge text fragments that are:
o Aligned in direction (similar angle ± 1°)
o Close along the baseline (projected distance < threshold)
o Same style
________________________________________
✅ Revised C# Strategy
csharp
CopyEdit
public class DxfSmartEngineeringTextGrouper : IEventListener
{
private readonly List<TextChunk> allChunks = new List<TextChunk>();
public void EventOccurred(IEventData data, EventType type)
{
if (type != EventType.RENDER_TEXT) return;
var info = (TextRenderInfo)data;
var baseline = info.GetBaseline();
var start = baseline.GetStartPoint();
var end = baseline.GetEndPoint();
var baselineVec = end.Subtract(start);
var angle = (float)Math.Atan2(baselineVec.Get(1), baselineVec.Get(0));
var width = baselineVec.Length();
allChunks.Add(new TextChunk
{
Text = info.GetText(),
X = start.Get(0),
Y = start.Get(1),
Width = width,
Angle = angle,
BaselineDirection = baselineVec.Normalize(),
FontName = info.GetFont().GetFontProgram().ToString(), // Simplified extraction
FontSize = info.GetFontSize(),
Color = info.GetFillColor()?.ToString() ?? "Black"
});
}
public ICollection<EventType> GetSupportedEvents() => new HashSet<EventType> { EventType.RENDER_TEXT };
public List<TextGroup> GroupTextChunks()
{
var groups = new List<TextGroup>();
const float proximityThreshold = 2.0f;
const float angleTolerance = 0.02f;
foreach (var chunk in allChunks)
{
var matchedGroup = groups.FirstOrDefault(g =>
Math.Abs(g.Angle - chunk.Angle) < angleTolerance &&
g.BaselineDirection.Dot(chunk.BaselineDirection) > 0.99 &&
g.FontName == chunk.FontName &&
g.FontSize == chunk.FontSize &&
g.Color == chunk.Color &&
g.IsNearby(chunk, proximityThreshold));
if (matchedGroup == null)
{
matchedGroup = new TextGroup
{
Angle = chunk.Angle,
FontName = chunk.FontName,
FontSize = chunk.FontSize,
Color = chunk.Color,
BaselineDirection = chunk.BaselineDirection
};
groups.Add(matchedGroup);
}
matchedGroup.Chunks.Add(chunk);
}
// Sort each group by position along baseline
foreach (var group in groups)
{
group.Chunks.Sort((a, b) => a.ProjectedPosition(group).CompareTo(b.ProjectedPosition(group)));
}
return groups;
}
public class TextChunk
{
public string Text;
public float X, Y, Width, Angle, FontSize;
public string FontName, Color;
public Vector BaselineDirection;
public Vector Position => new Vector(X, Y, 0);
public float ProjectedPosition(TextGroup g) =>
Position.Subtract(g.Chunks[0].Position).Dot(g.BaselineDirection);
}
public class TextGroup
{
public List<TextChunk> Chunks = new List<TextChunk>();
public string FontName, Color;
public float FontSize, Angle;
public Vector BaselineDirection;
public bool IsNearby(TextChunk c, float threshold)
{
return Chunks.Any(existing =>
Math.Abs(existing.ProjectedPosition(this) - c.ProjectedPosition(this)) < threshold);
}
public string GetMergedText()
{
return string.Join(" ", Chunks.Select(c => c.Text));
}
public string ToDatabaseRow(int pageNumber)
{
return $"{pageNumber},\"{FontName}\",{FontSize},{Color},{Angle},{GetMergedText()}";
}
}
}// public class DxfSmartEngineeringTextGrouper : IEventListener
________________________________________
✅ Output Table Format for DB Insert
Page Font Name Size Color Angle (rad) Merged Text
1 Arial 10.5 Black 0.03 IS 800:2007 Section 8.2
1 Arial 10.5 Black 1.57 Column A-A Elevation
________________________________________
✅ Final Notes
• Engineering documents often break glyphs into individual characters. Grouping depends on slope and style.
• The strategy mimics the text unbreaking techniques seen in Bluebeam or Tekla’s import layers.
• This strategy assumes linear placement along a single direction vector.
• It is ready to be exported into DXF or inserted into SQL.
If you want, I can also write:
• SQL CREATE TABLE scripts for this output
• DXF text inserter that uses this group list
Would you like that next?
How to call this class on the pdf document???????????? and please implement these codes also SQL CREATE TABLE scripts for this output
DXF text inserter that uses this group list
Detailed Article on Optimization Workflow for Pre-Engineered Buildings (PEB) ERP System
Title: Integrated Structural Optimization for Pre-Engineered Buildings Using STAAD.Pro and ERP Systems
Objective: To develop a system that automates design iterations for PEB (Pre-Engineered Buildings), achieving optimal structural design at a cost of less than 0.60 INR per kg, using STAAD.Pro analysis data integrated with ERP modules.
1. Overview of System ArchitectureSubgraph ERP System - Optimization Module
subgraph ERP System - Optimization Module
E --> F[Module 1: STAAD File Parser (Reader)]
F --> G{Extracts Data:
- Geometry (Nodes, Members)
- Load Cases & Combinations
- Initial Section Properties
- Analysis Results (Fx, Fy, Fz, Mx, My, Mz)
- Current Structure Weight
}
G --> H{Iteration Counter: iter = 1};
H --> I{Module 2: Design Optimization Engine};
subgraph Optimization Loop
I --> J[Identify Critical Members & Over/Under-Designed Sections]
J --> K[Generate Candidate Tapered Section Dimensions (GA/Gradient Descent)]
K --> L{Check Constraints (IS 800, IS 875, MBMA, AISC):
- Axial, Bending, Shear
- d/t, b/d Ratios
- Slenderness Limits
- Local Buckling
- Fabrication Feasibility
}
L -- "Valid Design?" --> M{No: Penalize/Discard Candidate};
M --> K;
L -- "Yes: Optimal?" --> N{Select Optimal Dimensions};
N --> O[Module 3: STAAD File Generator (Writer)]
O --> P[Generate NEW .std File with Revised Sections]
end
P --> Q(Trigger External STAAD Analysis);
Q --> R[Read New STAAD Output];
R --> S{Compare Weight & Check Convergence};
S -- "Improved?" --> H;2. Detailed Section Sizing ProcedureAt each iteration, for each Member and each 20mm segment, the following computations are performed:
2.1 Forces and Moments at Nodes and SegmentsFx, Fy, Fz
Mx, My, Mz
Use interpolation for mid-segment forces from end values.
2.2 Design Checks Per SegmentCompute required Area, Ixx, Iyy.
Determine Tension capacity: $T = f_y \cdot A_{net}$
Determine Compression capacity: $C = f_{cd} \cdot A_{gross}$ with slenderness checks:
$\lambda = KL/r$
Buckling curves from IS 800
Bending moment capacities:
$M_d = f_b \cdot Z$ (plastic or elastic section modulus)
Shear capacity: $V_d = f_v \cdot A_w$
Combined stresses (Interaction equations):
$\frac{P}{P_d} + \frac{M}{M_d} \leq 1.0$
2.3 Section Geometry VariablesTop Flange Width (TFW)
Top Flange Thickness (TFT)
Bottom Flange Width (BFW)
Bottom Flange Thickness (BFT)
Web Thickness (WT)
Tapered Web Start Depth (WDS)
Tapered Web End Depth (WDE)
2.4 Derived Ratios and Conditions$d/t$ ratio
$b/d$ ratio
Check for Local Buckling
Fabrication limits (min/max taper, max flange width, etc.)
3. Optimization Engine: Implementation Details3.1 AlgorithmType: Genetic Algorithm or Gradient-Based Optimization
Fitness Function:
Minimize Total Weight = $\sum_i \rho_i \cdot V_i$
Penalize constraint violations
Selection: Best K candidates from previous population
Mutation: Random variation in flange/web thicknesses
Crossover: Combine sections across members
3.2 Design Variables VectorEach member’s section described by:
[TFW, TFT, BFW, BFT, WDS, WDE, WT]3.3 Constraint EnforcementHard filters on IS 800/IS 875 limits
Soft penalties for near-bound violations
4. Connection and Support ConditionsRead from STAAD file and reassigned iteratively
For each member:
End releases (MY, MZ)
Support fixity (pinned/fixed/roller)
Automatically modify support types if deflection or moment transfer is excessive
5. Visualization and ReportingShow before-after comparison:
Total Structure Weight
Section Utilization Ratios
Maximum Displacements
Failed Constraints Summary
6. Final Output Generation.STD file with optimized member sections
DXF and CSV summary of member properties
JSON export for ERP integration
Drawing suggestions for fabrication drawings based on optimal tapered profiles
7. Benefits and Expected OutcomesDesign cost < ₹0.60/kg
Reuse of iterations through ERP-managed cache
Optimization within 2-3 cycles for common span/loads
Output sections manufacturable as per IS fabrication norms
8. DXF Text Inserter Implementationpublic static class DxfTextInserter
{
public static void InsertTextGroupToDXF(string dxfPath, List<DxfSmartEngineeringTextGrouper.TextGroup> textGroups)
{
using (var writer = new StreamWriter(dxfPath))
{
writer.WriteLine("0\nSECTION\n2\nENTITIES");
foreach (var group in textGroups)
{
foreach (var chunk in group.Chunks)
{
writer.WriteLine("0\nTEXT");
writer.WriteLine("8\n0");
writer.WriteLine($"10\n{chunk.X}\n20\n{chunk.Y}");
writer.WriteLine($"40\n{chunk.FontSize}");
writer.WriteLine($"1\n{chunk.Text}");
writer.WriteLine($"50\n{chunk.Angle * 180.0 / Math.PI}");
}
}
writer.WriteLine("0\nENDSEC\n0\nEOF");
}
}
}9. SQL CREATE TABLE for Text Grouping ReportCREATE TABLE EngineeringTextGroups (
PageNumber INT,
FontName VARCHAR(100),
FontSize FLOAT,
Color VARCHAR(50),
AngleRadians FLOAT,
MergedText TEXT
);10. Calling the Grouper on a PDF Documentpublic static void ProcessPdfWithGrouper(string pdfPath)
{
using var reader = new PdfReader(pdfPath);
using var doc = new PdfDocument(reader);
for (int i = 1; i <= doc.GetNumberOfPages(); i++)
{
var grouper = new DxfSmartEngineeringTextGrouper();
var processor = new PdfCanvasProcessor(grouper);
processor.ProcessPageContent(doc.GetPage(i));
var grouped = grouper.GroupTextChunks();
DxfTextInserter.InsertTextGroupToDXF($"output_page_{i}.dxf", grouped);
}
}This closes the loop: you can parse PDF text, group it smartly, export DXF, and log to SQL.
OpenFileDialog openFileDialog = new OpenFileDialog
{
Filter = "PDF files (*.pdf)|*.pdf",
Title = "Select a PDF file"
};
if (openFileDialog.ShowDialog() == DialogResult.OK)
{
string pdfPath = openFileDialog.FileName;
string outputPath = pdfPath + "_REPORT.TXT";// Path.ChangeExtension(pdfPath, ".txt");
using (PdfReader pdfReader = new PdfReader(pdfPath))
using (PdfDocument pdfDocument = new PdfDocument(pdfReader))
using (StreamWriter writer = new StreamWriter(outputPath, false, Encoding.UTF8))
{
for (int page = 1; page <= pdfDocument.GetNumberOfPages(); page++)
{
writer.WriteLine($"Page {page}:");
PdfPage pdfPage = pdfDocument.GetPage(page);
DETA7ExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string pageContent = PdfTextExtractor.GetTextFromPage(pdfPage, strategy);
writer.WriteLine(pageContent);
writer.WriteLine();
}// for (int page = 1; page <= pdfDocument.GetNumberOfPages(); page++)
}// using (StreamWriter writer = new StreamWriter(outputPath, false, Encoding.UTF8))
Console.WriteLine($"PDF content has been extracted to {outputPath}");
System.Windows.Forms.MessageBox.Show($"PDF content has been extracted to {outputPath}");
}//if (openFileDialog.ShowDialog() == DialogResult.OK)
using System;
using System.Collections.Generic;
using System.Text;
using DETA7.Commons.Utils;
using DETA7.Kernel.Geom;
using DETA7.Kernel.Pdf.Canvas.Parser;
using DETA7.Kernel.Pdf.Canvas.Parser.Data;
namespace DETA7.Kernel.Pdf.Canvas.Parser.Listener
{
public class SimpleTextExtractionStrategy : DETA7ExtractionStrategy
{
public Vector lastStart;
public Vector lastEnd;
/// <summary>used to store the resulting String.</summary>
public StringBuilder result = new StringBuilder();
public virtual void EventOccurred(IEventData data, EventType type)
{
if (type.Equals(EventType.RENDER_TEXT))
{
TextRenderInfo renderInfo = (TextRenderInfo)data;
bool firstRender = result.Length == 0;
bool hardReturn = false;
LineSegment segment = renderInfo.GetBaseline();
Vector start = segment.GetStartPoint();
Vector end = segment.GetEndPoint();
if (!firstRender)
{
Vector x1 = lastStart;
Vector x2 = lastEnd;
// see http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html
float dist = (x2.Subtract(x1)).Cross((x1.Subtract(start))).LengthSquared() / x2.Subtract(x1).LengthSquared ();
// we should probably base this on the current font metrics, but 1 pt seems to be sufficient for the time being
float sameLineThreshold = 1f;
if (dist > sameLineThreshold)
{
hardReturn = true;
}// if (dist > sameLineThreshold)
}
// Note: Technically, we should check both the start and end positions, in case the angle of the text changed without any displacement
// but this sort of thing probably doesn't happen much in reality, so we'll leave it alone for now
if (hardReturn)
{
//System.out.println("<< Hard Return >>");
AppendTextChunk("\n");
}
else
{
if (!firstRender)
{
// we only insert a blank space if the trailing character of the previous string wasn't a space, and the leading character of the current string isn't a space
if (result[result.Length - 1] != ' ' && renderInfo.GetText().Length > 0 && renderInfo.GetText()[0] != ' ')
{
float spacing = lastEnd.Subtract(start).Length();
if (spacing > renderInfo.GetSingleSpaceWidth() / 2f)
{
AppendTextChunk(" ");
}//if (spacing > renderInfo.GetSingleSpaceWidth() / 2f)
}//if (result[result.Length - 1] != ' ' && renderInfo.GetText().Length > 0 && renderInfo.GetText()[0] != ' ')
}// if (!firstRender)
}//end of else of if (hardReturn)
//System.out.println("Inserting implied space before '" + renderInfo.getText() + "'");
//System.out.println("Displaying first string of content '" + text + "' :: x1 = " + x1);
//System.out.println("[" + renderInfo.getStartPoint() + "]->[" + renderInfo.getEndPoint() + "] " + renderInfo.getText());
AppendTextChunk(renderInfo.GetText());
lastStart = start;
lastEnd = end;
}// if (type.Equals(EventType.RENDER_TEXT))
}
public virtual ICollection<EventType> GetSupportedEvents()
{
return JavaCollectionsUtil.UnmodifiableSet
(new LinkedHashSet<EventType>
(JavaCollectionsUtil.SingletonList
(EventType.RENDER_TEXT
)
)
);
}
/// <summary>Returns the result so far.</summary>
/// <returns>a String with the resulting text.</returns>
public virtual String GetResultantText()
{
return result.ToString();
}
/// <summary>Used to actually append text to the text results.</summary>
/// <remarks>
/// Used to actually append text to the text results. Subclasses can use this to insert
/// text that wouldn't normally be included in text parsing (e.g. result of OCR performed against
/// image content)
/// </remarks>
/// <param name="text">the text to append to the text results accumulated so far</param>
public void AppendTextChunk(String text)
{
result.Append(text);
}
}
}
namespace saan_trying_pathspositioningscaling_3000_OFFSETS_PAGES_TRANSFORMATIONS_MATRIX_GRAPHIKS_STATES_graphics_data_extractions_pagewises
{
//for tekla pdfexchanges the mediabox dont exists
//////private Stack<GraphicsState> graphicsStateStack = new Stack<GraphicsState>();
//////private GraphicsState currentGraphicsState = new GraphicsState();
//////public void EventOccurred(IEventData data, EventType type)
//////{
////// if (type == EventType.RENDER_PATH)
////// {
////// PathRenderInfo renderInfo = (PathRenderInfo)data;
////// ProcessPath(renderInfo);
////// }
////// else if (type == EventType.RENDER_TEXT)
////// {
////// TextRenderInfo renderInfo = (TextRenderInfo)data;
////// ProcessText(renderInfo);
////// }
////// else if (type == EventType.SAVE_GRAPHICS_STATE)
////// {
////// SaveGraphicsState();
////// }
////// else if (type == EventType.RESTORE_GRAPHICS_STATE)
////// {
////// RestoreGraphicsState();
////// }
////// else if (type == EventType.MODIFY_CTM)
////// {
////// ModifyCtm((Matrix)data);
////// }
//////}
//////public ICollection<EventType> GetSupportedEvents()
//////{
////// return new HashSet<EventType>
////// {
////// EventType.RENDER_PATH,
////// EventType.RENDER_TEXT,
////// EventType.SAVE_GRAPHICS_STATE,
////// EventType.RESTORE_GRAPHICS_STATE,
////// EventType.MODIFY_CTM
////// };
//////}
//this is the Graphics and Text extraction Listener
//this not only extracts the graphics it is to extract the texts also
public class GraphicsAndText_ExtractionListener : IEventListener
{
private readonly List<string> graphicsData = new List<string>();
private readonly List<string> ListOfStringAsTextDataOnlys = new List<string>();
private readonly List<string> dxfData = new List<string>();
private int currentPageNumber;
private float currentPageWidth;
private float currentPageHeight;
private Matrix currentTransformationMatrix = new Matrix();
private Matrix currentSAANTextTransformationMatrix = new Matrix();
public void SetPageInfo(int pageNumber, float pageWidth, float pageHeight)
{
currentPageNumber = pageNumber;
currentPageWidth = pageWidth;
currentPageHeight = pageHeight;
}// public void SetPageInfo(int pageNumber, float pageWidth, float pageHeight)
public void EventOccurred(IEventData data, EventType type)
{
if (type == EventType.RENDER_PATH)
{
PathRenderInfo renderInfo = (PathRenderInfo)data;
string pathDetails = GetPathDetails(renderInfo);
graphicsData.Add(pathDetails);
AddToDxfData(renderInfo, currentPageNumber);
}// if (type == EventType.RENDER_PATH)
//graphiks look ok for the Revit pdf files
//texts are not coming
//////if (type == EventType.END_TEXT)
//////{
////// TextRenderInfo ___textrenderinfo = (TextRenderInfo)data;
////// string ___found_text = ___textrenderinfo.GetText();
////// string TextDetailsFound = GetTextDetails(___textrenderinfo);
////// ListOfStringAsTextDataOnlys.Add(TextDetailsFound);
////// AddTextContentsToToDxfData(___textrenderinfo, currentPageNumber);//to implement
////// // to implement
////// // TextRegionEventFilter .Add(
////// //////PathRenderInfo renderInfo = (PathRenderInfo)data;
////// //////string pathDetails = GetPathDetails(renderInfo);
////// //////graphicsData.Add(pathDetails);
////// //////AddToDxfData(renderInfo, currentPageNumber);
//////}// if (type == EventType.END_TEXT)
if (type == EventType.RENDER_TEXT)
{
TextRenderInfo ___textrenderinfo = (TextRenderInfo)data;
string ___found_text = ___textrenderinfo.GetText();
string TextDetailsFound = GetTextDetails(___textrenderinfo);
ListOfStringAsTextDataOnlys.Add(TextDetailsFound);
AddTextContentsToToDxfData(___textrenderinfo, currentPageNumber);//to implement
// to implement
// TextRegionEventFilter .Add(
//////PathRenderInfo renderInfo = (PathRenderInfo)data;
//////string pathDetails = GetPathDetails(renderInfo);
//////graphicsData.Add(pathDetails);
//////AddToDxfData(renderInfo, currentPageNumber);
}// if (type == EventType.RENDER_TEXT)
//////if (type == EventType.END_TEXT)
//////{
////// TextRenderInfo ___textrenderinfo = (TextRenderInfo)data;
////// string ___found_text = ___textrenderinfo.GetText();
////// string TextDetailsFound = GetTextDetails(___textrenderinfo);
////// ListOfStringAsTextDataOnlys.Add(TextDetailsFound);
////// AddTextContentsToToDxfData(___textrenderinfo, currentPageNumber);//to implement
////// // to implement
////// // TextRegionEventFilter .Add(
////// //////PathRenderInfo renderInfo = (PathRenderInfo)data;
////// //////string pathDetails = GetPathDetails(renderInfo);
////// //////graphicsData.Add(pathDetails);
////// //////AddToDxfData(renderInfo, currentPageNumber);
//////}// if (type == EventType.END_TEXT)
//////if (type == EventType.BEGIN_TEXT)
//////{
////// TextRenderInfo ___textrenderinfo = (TextRenderInfo)data;
////// string ___found_text = ___textrenderinfo.GetText();
////// string TextDetailsFound = GetTextDetails(___textrenderinfo);
////// ListOfStringAsTextDataOnlys.Add(TextDetailsFound);
////// AddTextContentsToToDxfData(___textrenderinfo, currentPageNumber);//to implement
////// // to implement
////// // TextRegionEventFilter .Add(
////// //////PathRenderInfo renderInfo = (PathRenderInfo)data;
////// //////string pathDetails = GetPathDetails(renderInfo);
////// //////graphicsData.Add(pathDetails);
////// //////AddToDxfData(renderInfo, currentPageNumber);
//////}// if (type == EventType.BEGIN_TEXT)
}// public void EventOccurred(IEventData data, EventType type)
//////public ICollection<EventType> GetSupportedEvents()
//////{
////// return new HashSet<EventType> { EventType.RENDER_PATH };
//////}// public ICollection<EventType> GetSupportedEvents()
public ICollection<EventType> GetSupportedEvents()
{
return new HashSet<EventType>
{
EventType.BEGIN_TEXT,
EventType.RENDER_PATH,
EventType.RENDER_TEXT,
EventType.END_TEXT
//EventType.SAVE_GRAPHICS_STATE,
// EventType.RESTORE_GRAPHICS_STATE,
//EventType.MODIFY_CTM
};
}
private string GetPathDetails(PathRenderInfo renderInfo)
{
StringBuilder details = new StringBuilder();
details.AppendLine($"Page Number: {currentPageNumber}, Page Width: {currentPageWidth}, Page Height: {currentPageHeight}");
details.AppendLine("Path Details:");
details.AppendLine($"Operation: {renderInfo.GetOperation()}");
details.AppendLine($"Rule: {renderInfo.GetRule()}");
foreach (Subpath subpath in renderInfo.GetPath().GetSubpaths())
{
foreach (IShape shape in subpath.GetSegments())
{
if (shape is Line)
{
Line line = (Line)shape;
float[] start = TransformPoint(line.p1, renderInfo.GetCtm());
float[] end = TransformPoint(line.p2, renderInfo.GetCtm());
details.AppendLine($"Line: Start({start[0]}, {start[1]}) - End({end[0]}, {end[1]})");
}
else if (shape is BezierCurve)
{
BezierCurve curve = (BezierCurve)shape;
int controlPointCounter = 0;
foreach (DETA7.Kernel.Geom.Point controlPoint in curve.controlPoints)
{
float[] transformedControlPoint = TransformPoint(controlPoint, renderInfo.GetCtm());
details.AppendLine($"Curve: Control Point {controlPointCounter} - x: {transformedControlPoint[0]}, y: {transformedControlPoint[1]}");
controlPointCounter++;
}// foreach (DETA7.Kernel.Geom.Point controlPoint in curve.controlPoints)
}// else if (shape is BezierCurve)
}// foreach (IShape shape in subpath.GetSegments())
}// foreach (Subpath subpath in renderInfo.GetPath().GetSubpaths())
return details.ToString();
}// private string GetPathDetails(PathRenderInfo renderInfo)
//////private string GetTextDetails(TextRenderInfo FoundTextRenderInfo)
//////{
////// StringBuilder detailsForTexts = new StringBuilder();
////// // To implement
////// detailsForTexts.AppendLine("Text= " +FoundTextRenderInfo.text);
////// detailsForTexts.AppendLine("textMatrix = " + FoundTextRenderInfo.textMatrix.ToString());
////// detailsForTexts.AppendLine("GetUnscaledWidth = " + FoundTextRenderInfo.GetUnscaledWidth().ToString());
////// return detailsForTexts.ToString();
//////}//private string GetTextDetails(TextRenderInfo FoundTextRenderInfo)
//////private void AddTextContentsToToDxfData(TextRenderInfo FoundTextRenderInfo, int pageNumber)
//////{
////// float offsetspageswises = 30000 * (pageNumber - 1);
////// DETA7.Kernel.Geom.Point TextInsertionsPoint= FoundTextRenderInfo.
////// float[] transformedControlPoint = TransformPoint(controlPoint, FoundTextRenderInfo.GetCtm());
////// To implement dxf generations codes
////// renderInfo.
//////}//private void AddTextContentsToToDxfData(TextRenderInfo renderInfo, int pageNumber)
/// <summary>
///
///
///
///
///
/// </summary>
/// <param name="renderInfo"></param>
/// <param name="pageNumber"></param>
private string GetTextDetails(TextRenderInfo FoundTextRenderInfo)
{
StringBuilder detailsForTexts = new StringBuilder();
detailsForTexts.AppendLine("Text = " + FoundTextRenderInfo.GetText());
detailsForTexts.AppendLine("Text Matrix = " + FoundTextRenderInfo.GetTextMatrix().ToString());
detailsForTexts.AppendLine("Unscaled Width = " + FoundTextRenderInfo.GetUnscaledWidth().ToString());
LineSegment baseline = FoundTextRenderInfo.GetBaseline();
detailsForTexts.AppendLine("Baseline Start = " + baseline.GetStartPoint().ToString());
detailsForTexts.AppendLine("Baseline End = " + baseline.GetEndPoint().ToString());
detailsForTexts.AppendLine("Font Size = " + FoundTextRenderInfo.GetFontSize().ToString());
detailsForTexts.AppendLine("Text Rise = " + FoundTextRenderInfo.GetRise().ToString());
return detailsForTexts.ToString();
}//private string GetTextDetails(TextRenderInfo FoundTextRenderInfo)
private void AddTextContentsToToDxfData(TextRenderInfo FoundTextRenderInfo, int pageNumber)
{
float offsetspageswises = 30000 * (pageNumber - 1);
LineSegment baseline = FoundTextRenderInfo.GetBaseline();
Vector startPoint = baseline.GetStartPoint();
float x = startPoint.Get(Vector.I1);
float y = startPoint.Get(Vector.I2);
string text = FoundTextRenderInfo.GetText();
// DXF TEXT entity
// TO DO
// string dxfTextEntity = $"0\nTEXT\n8\n0\n10\n{offsetspageswises + x}\n20\n{y}\n30\n0.0\n40\n{FoundTextRenderInfo.GetFontSize()}\n1\n{text}\n";
// LineSegment baseline = FoundTextRenderInfo.GetBaseline();
Vector start = baseline.GetStartPoint();
Vector end = baseline.GetEndPoint();
float dx = end.Get(Vector.I1) - start.Get(Vector.I1);
float dy = end.Get(Vector.I2) - start.Get(Vector.I2);
float rotationAngle = (float)(Math.Atan2(dy, dx) * (180.0 / Math.PI)); // DXF expects degrees
float fontHeight = FoundTextRenderInfo.GetFontSize();
float fontWidth = FoundTextRenderInfo.GetUnscaledWidth(); // Optional: scale this if needed
LineSegment ascentLine = FoundTextRenderInfo.GetAscentLine();
float textHeight = ascentLine.GetLength(); // More accurate than GetFontSize()
// string dxfTextEntity = $"0\nTEXT\n8\n0\n10\n{offsetspageswises + x}\n20\n{y}\n30\n0.0\n40\n{FoundTextRenderInfo.GetFontSize()}\n1\n{text}";
//this also dont put the proper text height on dxf (sometimes texts look larger on dxf(than the pdf) sometimes looks smaller in dxf than it is in pdf(i think we need to do some additional rendering calculations for the text heights and text widths
/// string dxfTextEntity = $"0\nTEXT\n8\n0\n10\n{offsetspageswises + x}\n20\n{y}\n30\n0.0\n40\n{fontHeight}\n50\n{rotationAngle}\n1\n{text}";
///
//////detailsForTexts.AppendLine("Text = " + FoundTextRenderInfo.GetText());
//////detailsForTexts.AppendLine("Text Matrix = " + FoundTextRenderInfo.GetTextMatrix().ToString());
//////detailsForTexts.AppendLine("Unscaled Width = " + FoundTextRenderInfo.GetUnscaledWidth().ToString());
//////LineSegment baseline = FoundTextRenderInfo.GetBaseline();
//////detailsForTexts.AppendLine("Baseline Start = " + baseline.GetStartPoint().ToString());
//////detailsForTexts.AppendLine("Baseline End = " + baseline.GetEndPoint().ToString());
//////detailsForTexts.AppendLine("Font Size = " + FoundTextRenderInfo.GetFontSize().ToString());
//////detailsForTexts.AppendLine("Text Rise = " + FoundTextRenderInfo.GetRise().ToString());
float fontSize = FoundTextRenderInfo.GetFontSize();
// string fontName= FoundTextRenderInfo.GetFont()fontName
string fontName = FoundTextRenderInfo.GetFont().GetFontProgram().GetFontNames().GetFontName();
//string color = FoundTextRenderInfo.GetFillColor().ToString();
string color = FoundTextRenderInfo.GetFillColor().ToString();
float wordspacingfound = FoundTextRenderInfo.GetWordSpacing();
int aciColor = 0;
int r = 0;
int g = 0;
int b = 0;
try
{
DETA7.Kernel.Colors.Color fillColor = FoundTextRenderInfo.GetFillColor();
r = (int)(fillColor.GetColorValue()[0] * 255);
g = (int)(fillColor.GetColorValue()[1] * 255);
b = (int)(fillColor.GetColorValue()[2] * 255);
aciColor = GetClosestAciColor(r, g, b); // You can define this function or use fixed values
}
catch (Exception excp)
{
aciColor = 6;
}
// Create layer name
string layerName = $"{fontSize}_{rotationAngle}_{wordspacingfound}_{fontName}_{color}";
layerName = layerName.Replace(",", "_")
.Replace(".", "_").Replace(";", "_").Replace(" ", "_")
.Replace("+", "_").Replace("/", "_").Replace("\\", "_")
.Replace("DETA7_Kernel_Colors_", "");
// string dxfTextEntity = $"0\nTEXT\n8\n{layerName}\n10\n{offsetspageswises + x}\n20\n{y}\n30\n0.0\n40\n{textHeight}\n50\n{rotationAngle}\n1\n{text}";
string dxfTextEntity = $"0\nTEXT\n8\n{layerName}\n10\n{offsetspageswises + x}\n20\n{y}\n30\n0.0\n40\n{textHeight}\n50\n{rotationAngle}\n62\n{aciColor}\n1\n{text}";
string trueColor = $"420\n{(r << 16) + (g << 8) + b}";
//sanjoynath text
string dxfTextEntity_saans = $"0\nTEXT\n8\n0\n10\n{offsetspageswises + x}\n20\n{y}\n30\n0.0\n40\n{0.01}\n50\n{rotationAngle}\n1\n"+"SANJOYNATH";
dxfData.Add(dxfTextEntity);
dxfData.Add(dxfTextEntity_saans);//SanjoyNath texts
// Create layer name
// string layerName = $"{fontSize}_{rotationAngle}_{fontName}_{color}";
// Add
//////// Add text to DXF content
//////dxfContent.AppendLine("0");
//////dxfContent.AppendLine("TEXT");
//////dxfContent.AppendLine("8");
//////dxfContent.AppendLine(layerName);
//////dxfContent.AppendLine("10");
//////dxfContent.AppendLine(offsetX.ToString());
//////dxfContent.AppendLine("20");
//////dxfContent.AppendLine(offsetY.ToString());
//////dxfContent.AppendLine("40");
//////dxfContent.AppendLine(fontSize.ToString());
//////dxfContent.AppendLine("50");
//////dxfContent.AppendLine(rotationAngle.ToString());
//////dxfContent.AppendLine("1");
//////dxfContent.AppendLine(text);
}//private void AddTextContentsToToDxfData(TextRenderInfo FoundTextRenderInfo, int pageNumber)
private int GetClosestAciColor(int r, int g, int b)
{
// Basic ACI color map (partial, can be extended)
Dictionary<int, (int R, int G, int B)> aciColors = new Dictionary<int, (int, int, int)>
{
{1, (255, 0, 0)}, // Red
{2, (255, 255, 0)}, // Yellow
{3, (0, 255, 0)}, // Green
{4, (0, 255, 255)}, // Cyan
{5, (0, 0, 255)}, // Blue
{6, (255, 0, 255)}, // Magenta
{7, (255, 255, 255)}, // White
{8, (128, 128, 128)}, // Gray
{9, (192, 192, 192)} // Light Gray
};
int closestIndex = 7; // Default to white
double minDistance = double.MaxValue;
foreach (var kvp in aciColors)
{
int aci = kvp.Key;
var (r2, g2, b2) = kvp.Value;
double distance = Math.Sqrt(Math.Pow(r - r2, 2) + Math.Pow(g - g2, 2) + Math.Pow(b - b2, 2));
if (distance < minDistance)
{
minDistance = distance;
closestIndex = aci;
}
}
return closestIndex;
}//private int GetClosestAciColor(int r, int g, int b)
private void AddToDxfData(PathRenderInfo renderInfo, int pageNumber)
{
float offsetspageswises = 30000 * (pageNumber - 1);
// Extract styling info
float lineWidth = renderInfo.GetLineWidth();
string lineType = renderInfo.GetLineDashPattern()?.ToString() ?? "Continuous";
string lineCap = renderInfo.GetLineCapStyle().ToString();
string lineJoin = renderInfo.GetLineJoinStyle().ToString();
DETA7.Kernel.Colors.Color strokeColor = renderInfo.GetStrokeColor();
string STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED = "";
//////int r = (int)(strokeColor.GetColorValue()[0] * 255);
//////int g = (int)(strokeColor.GetColorValue()[1] * 255);
//////int b = (int)(strokeColor.GetColorValue()[2] * 255);
//////int aciColor = GetClosestAciColor(r, g, b);
int aciColor = 0;
int r = 0;
int g = 0;
int b = 0;
try
{
/// DETA7.Kernel.Colors.Color fillColor = FoundTextRenderInfo.GetFillColor();
r = (int)(strokeColor.GetColorValue()[0] * 255);
g = (int)(strokeColor.GetColorValue()[1] * 255);
b = (int)(strokeColor.GetColorValue()[2] * 255);
aciColor = GetClosestAciColor(r, g, b); // You can define this function or use fixed values
}
catch (Exception excp)
{
aciColor = 6;
}
// Create DXF-safe layer name
// string rawLayerName = $"{lineWidth}_{lineType}_{lineCap}_{lineJoin}_{aciColor}";
//12_5424_0_GHQWKA_Arial_DETA7_Kernel_Colors_DeviceGray
//strokeColor
string rawLayerName = $"{lineWidth}_{lineType}_{lineCap}_{lineJoin}_{strokeColor.ToString()}";
string layerName = System.Text.RegularExpressions.Regex.Replace(rawLayerName, @"[^a-zA-Z0-9_]", "_")
.Replace("DETA7_Kernel_Colors_","");
////// layerName = layerName +"_"+ renderInfo.GetPath().GetSubpaths().Count;
int subpathcount = 0;
int linecountinshape = 0;
int BezierCurvecountinshape = 0;
subpathcount = renderInfo.GetPath().GetSubpaths().Count;
foreach (Subpath subpath in renderInfo.GetPath().GetSubpaths())
{
List<IShape> segments = subpath.GetSegments().ToList();
// Ensure loop closure if needed
// if (segments.Count > 1 && segments[0] is Line first && segments[1] is Line last)
if (segments.Count > 1 && segments[0] is Line first && segments[segments.Count-1] is Line last)
{
if (!PointsEqual(first.p1, last.p2))
{
segments.Add(new Line(last.p2, first.p1));
//SAAN ADDS THESE
STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED = "CLOSED";
aciColor = 3;
if (STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("CLOSED"))
{
//dont add
}//if(!STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("CLOSED"))
else
{
layerName = layerName + "_" + STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED;
}//end of else of if (STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("CLOSED"))
}//if (!PointsEqual(first.p1, last.p2))
else
{
//SAAN ADDS THESE
STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED = "OPEN";
aciColor = 1;
////// layerName = layerName + "_" + STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED;
///
if (STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("OPEN"))
{
//dont add
}//if(!STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("OPEN"))
else
{
layerName = layerName + "_" + STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED;
}//end of else of if (STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("OPEN"))
}//END OF ELSE OF if (!PointsEqual(first.p1, last.p2))
}// if (segments.Count > 1 && segments[0] is Line first && segments[segments.Count-1] is Line last)
// layerName = layerName + "_" + renderInfo.GetPath().GetSubpaths().Count;
foreach (IShape shape in segments)
{
if (shape is Line line)
{
linecountinshape++;
float[] start = TransformPoint(line.p1, renderInfo.GetCtm());
float[] end = TransformPoint(line.p2, renderInfo.GetCtm());
// string dxfLine = $"0\nLINE\n8\n{layerName}\n10\n{offsetspageswises + start[0]}\n20\n{start[1]}\n30\n0.0\n11\n{offsetspageswises + end[0]}\n21\n{end[1]}\n31\n0.0\n62\n{aciColor}\n370\n{(int)(lineWidth * 100)}";
//SAAN ADDS THESE
STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED = "LINEAR";
aciColor = 22;
// layerName = layerName + "_" + STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED;
if (STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("LINEAR"))
{
//dont add
}//if(!STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("LINEAR"))
else
{
layerName = layerName + "_" + STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED;
}//end of else of if (STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("LINEAR"))
string dxfLine = $"0\nLINE\n8\n{layerName}\n10\n{offsetspageswises + start[0]}\n20\n{start[1]}\n30\n0.0\n11\n{offsetspageswises + end[0]}\n21\n{end[1]}\n31\n0.0\n62\n{aciColor}";
dxfData.Add(dxfLine);
}
else if (shape is BezierCurve curve)
{
BezierCurvecountinshape++;
float[] start = TransformPoint(curve.controlPoints[0], renderInfo.GetCtm());
float[] control1 = TransformPoint(curve.controlPoints[1], renderInfo.GetCtm());
float[] control2 = TransformPoint(curve.controlPoints[2], renderInfo.GetCtm());
float[] end = TransformPoint(curve.controlPoints[3], renderInfo.GetCtm());
int numSegments = 10;
float tStep = 1.0f / numSegments;
float[] prevPoint = start;
for (int i = 1; i <= numSegments; i++)
{
float t = i * tStep;
float[] point = CalculateBezierPoint(t, start, control1, control2, end);
/// string dxfCurveSegment = $"0\nLINE\n8\n{layerName}\n10\n{offsetspageswises + prevPoint[0]}\n20\n{prevPoint[1]}\n30\n0.0\n11\n{offsetspageswises + point[0]}\n21\n{point[1]}\n31\n0.0\n62\n{aciColor}\n370\n{(int)(lineWidth * 100)}";
//SAAN ADDS THESE
STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED = "BEZIERED";
aciColor = 11;
if (STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("BEZIERED"))
{
//dont add
}//if(!STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("BEZIERED"))
else
{
layerName = layerName + "_" + STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED;
}//end of else of if (STRING_LINEPATHCATEGORY___CLOSED_OPEN_LINEAR_BEZIERED.Contains("BEZIERED"))
string dxfCurveSegment = $"0\nLINE\n8\n{layerName}\n10\n{offsetspageswises + prevPoint[0]}\n20\n{prevPoint[1]}\n30\n0.0\n11\n{offsetspageswises + point[0]}\n21\n{point[1]}\n31\n0.0\n62\n{aciColor}";
dxfData.Add(dxfCurveSegment);
prevPoint = point;
}//for (int i = 1; i <= numSegments; i++)
}// else if (shape is BezierCurve curve)
}// foreach (IShape shape in segments)
}// foreach (Subpath subpath in renderInfo.GetPath().GetSubpaths())
///cant we add all the LINE to dxf after all the data accumulations are done???????? if we can do that then we can filter things with more proper layer names such that we can filter the data more properly with the global conditions
///for each path there are several subpaths and several line shapes several beziere shapes and classifying these line shapes , beziere shapes closedness of polygons , if the curve is like circle or if that is like splines or if these total path objects semantically looks like ellipse or circles or rectangles or if there are object bounding box AABB like properties then we can accumulate all these things properly
/////if we can put the informations to the layers of these objects in the dxf files then it is more helpful to extract more meaningfull informations for engineering drawings
}//// private void AddToDxfData(PathRenderInfo renderInfo, int pageNumber)
// Helper to check if two points are equal within a tolerance
private bool PointsEqual(DETA7.Kernel.Geom.Point p1, DETA7.Kernel.Geom.Point p2, float tolerance = 0.01f)
{
return Math.Abs(p1.GetX() - p2.GetX()) < tolerance && Math.Abs(p1.GetY() - p2.GetY()) < tolerance;
}//private bool PointsEqual(DETA7.Kernel.Geom.Point p1, DETA7.Kernel.Geom.Point p2, float tolerance = 0.01f)
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// NOT IN USE
/// </summary>
/// <param name="renderInfo"></param>
/// <param name="pageNumber"></param>
private void AddToDxfData___THISWASWORKINGNOWENHANCING(PathRenderInfo renderInfo, int pageNumber)
{
float offsetspageswises = 30000 * (pageNumber - 1);
foreach (Subpath subpath in renderInfo.GetPath().GetSubpaths())
{
foreach (IShape shape in subpath.GetSegments())
{
if (shape is Line)
{
Line line = (Line)shape;
float[] start = TransformPoint(line.p1, renderInfo.GetCtm());
float[] end = TransformPoint(line.p2, renderInfo.GetCtm());
dxfData.Add($"0\nLINE\n8\n0\n10\n{offsetspageswises + start[0]}\n20\n{start[1]}\n30\n0.0\n11\n{offsetspageswises + end[0]}\n21\n{end[1]}\n31\n0.0");
}
else if (shape is BezierCurve)
{
BezierCurve curve = (BezierCurve)shape;
float[] start = TransformPoint(curve.controlPoints[0], renderInfo.GetCtm());
float[] control1 = TransformPoint(curve.controlPoints[1], renderInfo.GetCtm());
float[] control2 = TransformPoint(curve.controlPoints[2], renderInfo.GetCtm());
float[] end = TransformPoint(curve.controlPoints[3], renderInfo.GetCtm());
// Approximate the Bezier curve with a series of lines
int numSegments = 10; // Number of line segments to approximate the curve
float tStep = 1.0f / numSegments;
float[] prevPoint = start;
for (int i = 1; i <= numSegments; i++)
{
float t = i * tStep;
float[] point = CalculateBezierPoint(t, start, control1, control2, end);
dxfData.Add($"0\nLINE\n8\n0\n10\n{offsetspageswises + prevPoint[0]}\n20\n{prevPoint[1]}\n30\n0.0\n11\n{offsetspageswises + point[0]}\n21\n{point[1]}\n31\n0.0");
prevPoint = point;
}// for (int i = 1; i <= numSegments; i++)
}// else if (shape is BezierCurve)
}// foreach (IShape shape in subpath.GetSegments())
}// foreach (Subpath subpath in renderInfo.GetPath().GetSubpaths())
}// private void AddToDxfData(PathRenderInfo renderInfo, int pageNumber)
private float[] CalculateBezierPoint(float t, float[] p0, float[] p1, float[] p2, float[] p3)
{
float u = 1 - t;
float tt = t * t;
float uu = u * u;
float uuu = uu * u;
float ttt = tt * t;
float[] point = new float[2];
point[0] = uuu * p0[0]; // uuu * p0
point[0] += 3 * uu * t * p1[0]; // 3 * uu * t * p1
point[0] += 3 * u * tt * p2[0]; // 3 * u * tt * p2
point[0] += ttt * p3[0]; // ttt * p3
point[1] = uuu * p0[1]; // uuu * p0
point[1] += 3 * uu * t * p1[1]; // 3 * uu * t * p1
point[1] += 3 * u * tt * p2[1]; // 3 * u * tt * p2
point[1] += ttt * p3[1]; // ttt * p3
return point;
}// private float[] CalculateBezierPoint(float t, float[] p0, float[] p1, float[] p2, float[] p3)
////////////private float[] TransformPoint(Vector point, Matrix matrix)
////////////{
//////////// float x = point.Get(Vector.I1);
//////////// float y = point.Get(Vector.I2);
//////////// float transformedX = matrix.Get(Matrix.I11) * x + matrix.Get(Matrix.I12) * y + matrix.Get(Matrix.I13);
//////////// float transformedY = matrix.Get(Matrix.I21) * x + matrix.Get(Matrix.I22) * y + matrix.Get(Matrix.I23);
//////////// return new float[] { transformedX, transformedY };
////////////}// private float[] TransformPoint(Vector point, Matrix matrix)
//////private float[] TransformPoint(DETA7.Kernel.Geom.Point point, Matrix matrix)
//////{
////// float x = (float)point.GetX();
////// float y = (float)point.GetY();
////// float transformedX = matrix.Get(Matrix.I11) * x + matrix.Get(Matrix.I12) * y + matrix.Get(Matrix.I13);
////// float transformedY = matrix.Get(Matrix.I21) * x + matrix.Get(Matrix.I22) * y + matrix.Get(Matrix.I23);
////// return new float[] { transformedX, transformedY };
//////}// private float[] TransformPoint(DETA7.Kernel.Geom.Point point, Matrix matrix)
private float[] TransformPoint(Vector point, Matrix matrix)
{
float x = point.Get(Vector.I1);
float y = point.Get(Vector.I2);
float transformedX = matrix.Get(Matrix.I11) * x + matrix.Get(Matrix.I12) * y + matrix.Get(Matrix.I13);
float transformedY = matrix.Get(Matrix.I21) * x + matrix.Get(Matrix.I22) * y + matrix.Get(Matrix.I23);
return new float[] { transformedX, transformedY };
}// private float[] TransformPoint(Vector point, Matrix matrix)
private float[] TransformPoint(DETA7.Kernel.Geom.Point point, Matrix matrix)
{
float x = (float)point.GetX();
float y = (float)point.GetY();
float transformedX = matrix.Get(Matrix.I11) * x + matrix.Get(Matrix.I12) * y + matrix.Get(Matrix.I13);
float transformedY = matrix.Get(Matrix.I21) * x + matrix.Get(Matrix.I22) * y + matrix.Get(Matrix.I23);
return new float[] { transformedX, transformedY };
}// private float[] TransformPoint(DETA7.Kernel.Geom.Point point, Matrix matrix)
public void WriteGraphicsData(string outputFilePath)
{
graphicsData.AddRange(ListOfStringAsTextDataOnlys);
File.WriteAllLines(outputFilePath, graphicsData);
}// public void WriteGraphicsData(string outputFilePath)
public void WriteDxfData(string outputFilePath)
{
List<string> dxfContent = new List<string>();
List<string> dxfHeader = new List<string>
{
"0",
"SECTION",
"2",
"HEADER",
"0",
"ENDSEC",
"0",
"SECTION",
"2",
"TABLES",
"0",
"ENDSEC",
"0",
"SECTION",
"2",
"BLOCKS",
"0",
"ENDSEC",
"0",
"SECTION",
"2",
"ENTITIES"
};
List<string> dxfFooter = new List<string>
{
"0",
"ENDSEC",
"0",
"SECTION",
"2",
"OBJECTS",
"0",
"ENDSEC",
"0",
"EOF"
};
dxfContent = new List<string>();
dxfContent.AddRange(dxfHeader);
dxfContent.AddRange(dxfData);
dxfContent.AddRange(dxfFooter);
File.WriteAllLines(outputFilePath, dxfContent);
}// public void WriteDxfData(string outputFilePath)
}// public class GraphicsAndText_ExtractionListener : IEventListener
public class PdfGraphicsExtractor_to_singles_30000_offsets_dxf____trying_with_new_scales_translations
{
public static void publicstaticvoid___read_inputFilePath_dump_pagewise_graphics_data(string selected_pdffilesnames)
{
string inputFilePath = selected_pdffilesnames;
string outputFilePath = inputFilePath + "___dumping_pagewises_graphics_data.txt";
string dxfFilePath = inputFilePath + "___pagewises_graphics_data.dxf";
PdfReader reader = new PdfReader(inputFilePath);
PdfDocument pdfDoc = new PdfDocument(reader);
GraphicsAndText_ExtractionListener listener = new GraphicsAndText_ExtractionListener();
// Create a PdfCanvasProcessor
PdfCanvasProcessor processor = new PdfCanvasProcessor(listener);
// Process each page
for (int i = 1; i <= pdfDoc.GetNumberOfPages(); i++)
{
PdfPage page = pdfDoc.GetPage(i);
float pageWidth = page.GetPageSize().GetWidth();
float pageHeight = page.GetPageSize().GetHeight();
listener.SetPageInfo(i, pageWidth, pageHeight);
processor.ProcessPageContent(page);
}// for (int i = 1; i <= pdfDoc.GetNumberOfPages(); i++)
// Write the extracted graphics data to a file
listener.WriteGraphicsData(outputFilePath);
// Write the extracted graphics data to a DXF file
listener.WriteDxfData(dxfFilePath);
// Close the PdfDocument
pdfDoc.Close();
Console.WriteLine("Graphics extraction and DXF generation completed successfully.");
System.Windows.Forms.MessageBox.Show("Graphics extraction and DXF generation completed successfully.");
}//public static void publicstaticvoid___read_inputFilePath_dump_pagewise_graphics_data(string selected_pdffilesnames)
}// public class PdfGraphicsExtractor_to_singles_30000_offsets_dxf____trying_with_new_scales_translations
}//namespace saan_trying_pathspositioningscaling_3000_OFFSETS_PAGES_TRANSFORMATIONS_MATRIX_GRAPHIKS_STATES_graphics_data_extractions_pagewises
refer this
using System;
using System.Collections.Generic;
using Microsoft.Extensions.Logging;
using DETA7.Commons;
using DETA7.Commons.Utils;
using DETA7.IO.Font;
using DETA7.IO.Source;
using DETA7.Kernel.Colors;
using DETA7.Kernel.Exceptions;
using DETA7.Kernel.Font;
using DETA7.Kernel.Geom;
using DETA7.Kernel.Logs;
using DETA7.Kernel.Pdf;
using DETA7.Kernel.Pdf.Canvas;
using DETA7.Kernel.Pdf.Canvas.Parser.Data;
using DETA7.Kernel.Pdf.Canvas.Parser.Listener;
using DETA7.Kernel.Pdf.Canvas.Parser.Util;
using DETA7.Kernel.Pdf.Colorspace;
using DETA7.Kernel.Pdf.Extgstate;
using System.Text;
//saan finds that this class is used to read the pdf files and to parse the pdf files
namespace DETA7.Kernel.Pdf.Canvas.Parser
{
/// <summary>Processor for a PDF content stream.</summary>
public class PdfCanvasProcessor
{
public static double PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS = 0;
public static double PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING = 0;
public static StringBuilder PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
= new StringBuilder();
public static bool PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG = true;
// a b 0
// c d 0
// e f 1
// matrix.Get(Matrix.I11) IS a
// matrix.Get(Matrix.I12) IS c
// matrix.Get(Matrix.I13) IS e
// matrix.Get(Matrix.I21) IS b
// matrix.Get(Matrix.I22) IS d
// matrix.Get(Matrix.I23) IS f
// matrix.Get(Matrix.I31) IS 0
// matrix.Get(Matrix.I32) IS 0
// matrix.Get(Matrix.I33) IS 1
public static float CTM_SAAN__a = 0;// ((PdfNumber)operands[0]).FloatValue();
public static float CTM_SAAN__b = 0;// ((PdfNumber)operands[1]).FloatValue();
public static float CTM_SAAN__c = 0;// ((PdfNumber)operands[2]).FloatValue();
public static float CTM_SAAN__d = 0;// ((PdfNumber)operands[3]).FloatValue();
public static float CTM_SAAN__e = 0;// ((PdfNumber)operands[4]).FloatValue();
public static float CTM_SAAN__f = 0;// ((PdfNumber)operands[5]).FloatValue();
public static StringBuilder PUBLIC_STATIC_STRINGBUILDER___SAAN___kernel_pdf_canvas_parser_PdfCanvasProcessor = new StringBuilder();
public const String DEFAULT_OPERATOR = "DefaultOperator";
/// <summary>Listener that will be notified of render events</summary>
public IEventListener eventListener;
/// <summary>
/// Cache supported events in case the user's
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener.GetSupportedEvents()"/>
/// method is not very efficient
/// </summary>
public ICollection<EventType> supportedEvents;
public Path currentPath = new Path();
/// <summary>
/// Indicates whether the current clipping path should be modified by
/// intersecting it with the current path.
/// </summary>
public bool isClip;
/// <summary>
/// Specifies the filling rule which should be applied while calculating
/// new clipping path.
/// </summary>
public int clippingRule;
/// <summary>A map with all supported operators (PDF syntax).</summary>
public IDictionary<String, IContentOperator> operators;
/// <summary>Resources for the content stream.</summary>
/// <remarks>
/// Resources for the content stream.
/// Current resources are always at the top of the stack.
/// Stack is needed in case if some "inner" content stream with it's own resources
/// is encountered (like Form XObject).
/// </remarks>
public IList<PdfResources> resourcesStack;
/// <summary>Stack keeping track of the graphics state.</summary>
public Stack<ParserGraphicsState> gsStack = new Stack<ParserGraphicsState>();
public Matrix textMatrix;
public Matrix textLineMatrix;
/// <summary>A map with all supported XObject handlers</summary>
public IDictionary<PdfName, IXObjectDoHandler> xobjectDoHandlers;
/// <summary>The font cache</summary>
public IDictionary<int, WeakReference> cachedFonts = new Dictionary<int, WeakReference>();
/// <summary>A stack containing marked content info.</summary>
public Stack<CanvasTag> markedContentStack = new Stack<CanvasTag>();
/// <summary>A memory limits handler.</summary>
public MemoryLimitsAwareHandler memoryLimitsHandler = null;
/// <summary>Page size in bytes.</summary>
public long pageSize = 0;
public static int PUBLIC_STATIC_INT_SAAN_CURRENT_PAGE_NUMBER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_double_SAAN_CURRENT_PAGE_WIDTH___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_double_SAAN_CURRENT_PAGE_HEIGHT___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_TEXT_CHUNK_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_TEXT_CHUNK_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_LINESEGMENT_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_LINESEGMENT_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_CURVE_Mm_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_CURVE_Mm_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_CURVE_Ss_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_CURVE_Ss_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_IMAGE_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_IMAGE_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
/// <summary>
/// Creates a new PDF Content Stream Processor that will send its output to the
/// designated render listener.
/// </summary>
/// <param name="eventListener">
/// the
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener"/>
/// that will receive rendering notifications
/// </param>
///
public PdfCanvasProcessor(IEventListener eventListener)
{
//saan will use this for the detailed first draft analysis
//after the studying of this logs saan will study the consditions fo process flows and then saan will populate a List
//and then the csv like reporting for sequences of operations will get done after whole flows are dones
PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS ++;
PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG = true;
//rough code to use
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
if (PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
{
if (PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
{
//do nothing
PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;//SAAN THINKS CHECKING NECESSARY
}
else
{
PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
// this is done for the nullity cases
PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
= new StringBuilder();
// PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
// .Clear();
}//end of else of if (PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.AppendLine
(
new string(' ',60- PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS.ToString().Length)+ PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS
+" " +
new string(' ', 60 - PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING.ToString().Length) + PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING
+ " " +
"entered here public PdfCanvasProcessor(IEventListener eventListener) eventListener = " + eventListener.GetType().ToString()
);
}//if(PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
//saan will not use this for every operations this is for different purpose for the seperate list to log reporting at the end of all analysis dones
//saan will use this after all the conditions are properly studied and the sequences of canvas processing are well clarified
PUBLIC_STATIC_STRINGBUILDER___SAAN___kernel_pdf_canvas_parser_PdfCanvasProcessor = new StringBuilder();
// THIS STRINGBUILDER IS FOR SEPERATE PURPOSE AFTER ALL THE CANVAS PROCESSING EVENTS SEQUENCES ARE ALL WELL UNDERSTOOD
//WE WILL POPULATE THINGS TO ORGANIZED LIST OF OBJECTS FOR PARSING AND HANDLING THE ENGINEERING GRAPHICS ON THE FLY AND TO CHANGE COLORS OR OTHER THINGS
//AND WHILE DOING SO WE NEED THE TRACKING OF THE PARSER ACTIVITIES AND THE SEQUENCES OF OPERATIONS WHILE PROCESSING THE CANVAS AND RENDERING FILTERING THINGS
PUBLIC_STATIC_STRINGBUILDER___SAAN___kernel_pdf_canvas_parser_PdfCanvasProcessor.Clear();
//while reading the pdf this is started once
PUBLIC_STATIC_INT_SAAN_CURRENT_PAGE_NUMBER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_double_SAAN_CURRENT_PAGE_WIDTH___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_double_SAAN_CURRENT_PAGE_HEIGHT___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_TEXT_CHUNK_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_TEXT_CHUNK_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_LINESEGMENT_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_LINESEGMENT_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_CURVE_Mm_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_CURVE_Mm_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_CURVE_Ss_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_CURVE_Ss_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_IMAGE_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_IMAGE_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
this.eventListener = eventListener;
this.supportedEvents = eventListener.GetSupportedEvents();
operators = new Dictionary<String, IContentOperator>();
PopulateOperators();
xobjectDoHandlers = new Dictionary<PdfName, IXObjectDoHandler>();
PopulateXObjectDoHandlers();
Reset();
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
if (Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
{
if (Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
{
//do nothing
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
}
else
{
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
// this is done for the nullity cases
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
= new StringBuilder();
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.Clear();
}//end of else of if (PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.AppendLine
(
new string(' ', 60 - Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS.ToString().Length) + Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS
+ " " +
new string(' ', 60 - Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING.ToString().Length) + Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING
+ " " +
"AFTER this.eventListener = eventListener; AFTER PopulateXObjectDoHandlers(); AFTER this.supportedEvents = eventListener.GetSupportedEvents(); AFTER PopulateOperators(); AFTER this.supportedEvents = eventListener.GetSupportedEvents();"
);
}//if(PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
}// public PdfCanvasProcessor(IEventListener eventListener)
/// <summary>
/// Creates a new PDF Content Stream Processor that will send its output to the
/// designated render listener.
/// </summary>
/// <remarks>
/// Creates a new PDF Content Stream Processor that will send its output to the
/// designated render listener.
/// Also allows registration of custom IContentOperators that can influence
/// how (and whether or not) the PDF instructions will be parsed.
/// </remarks>
/// <param name="eventListener">
/// the
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener"/>
/// that will receive rendering notifications
/// </param>
/// <param name="additionalContentOperators">
/// an optional map of custom
/// <see cref="IContentOperator"/>
/// s for rendering instructions
/// </param>
public PdfCanvasProcessor(IEventListener eventListener, IDictionary<String, IContentOperator> additionalContentOperators )
: this(eventListener)
{
foreach (KeyValuePair<String, IContentOperator> entry in additionalContentOperators)
{
RegisterContentOperator(entry.Key, entry.Value);
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
if (Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
{
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////// non nullity checks and nullity checks are separate from logging///////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if (Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
{
//do nothing
//since the Stringbuilder is already constructed and we are in the process
//so we will not clear that nor we will construct that if the objects are not null
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
}
else
{
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
// this is done for the nullity cases
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
= new StringBuilder();
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.Clear();
}//end of else of if (PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////// non nullity checks and nullity checks are seperate from logging///////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////// below are the cases where we simply log the things //////////////////////////////////////
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.AppendLine
(
new string(' ', 60 - Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS.ToString().Length) + Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS
+ " " +
new string(' ', 60 - Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING.ToString().Length) + Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING
+ " " +
"INSIDE public PdfCanvasProcessor(IEventListener eventListener, IDictionary<String, IContentOperator> additionalContentOperators ) INSIDE foreach (KeyValuePair<String, IContentOperator> entry in additionalContentOperators) entry.Key = " + entry.Key + " entry.Value = "+ entry.Value
);
}//if(PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
}// foreach (KeyValuePair<String, IContentOperator> entry in additionalContentOperators)
}// public PdfCanvasProcessor(IEventListener eventListener, IDictionary<String, IContentOperator> additionalContentOperators )
/// <summary>Registers a Do handler that will be called when Do for the provided XObject subtype is encountered during content processing.
/// </summary>
/// <remarks>
/// Registers a Do handler that will be called when Do for the provided XObject subtype is encountered during content processing.
/// <br />
/// If you register a handler, it is a very good idea to pass the call on to the existing registered handler (returned by this call), otherwise you
/// may inadvertently change the public behavior of the processor.
/// </remarks>
/// <param name="xobjectSubType">the XObject subtype this handler will process, or PdfName.DEFAULT for a catch-all handler
/// </param>
/// <param name="handler">the handler that will receive notification when the Do operator for the specified subtype is encountered
/// </param>
/// <returns>the existing registered handler, if any</returns>
public virtual IXObjectDoHandler RegisterXObjectDoHandler(PdfName xobjectSubType, IXObjectDoHandler handler )
{
return xobjectDoHandlers.Put(xobjectSubType, handler);
}// public virtual IXObjectDoHandler RegisterXObjectDoHandler(PdfName xobjectSubType, IXObjectDoHandler handler )
/// <summary>Registers a content operator that will be called when the specified operator string is encountered during content processing.
/// </summary>
/// <remarks>
/// Registers a content operator that will be called when the specified operator string is encountered during content processing.
/// <br />
/// If you register an operator, it is a very good idea to pass the call on to the existing registered operator (returned by this call), otherwise you
/// may inadvertently change the public behavior of the processor.
/// </remarks>
/// <param name="operatorString">the operator id, or DEFAULT_OPERATOR for a catch-all operator</param>
/// <param name="operator">the operator that will receive notification when the operator is encountered</param>
/// <returns>the existing registered operator, if any</returns>
public virtual IContentOperator RegisterContentOperator(String operatorString, IContentOperator @operator)
{
return operators.Put(operatorString, @operator);
}//public virtual IContentOperator RegisterContentOperator(String operatorString, IContentOperator @operator)
/// <summary>
/// Gets the
/// <see cref="System.Collections.ICollection{E}"/>
/// containing all the registered operators strings.
/// </summary>
/// <returns>
///
/// <see cref="System.Collections.ICollection{E}"/>
/// containing all the registered operators strings.
/// </returns>
public virtual ICollection<String> GetRegisteredOperatorStrings()
{
return new List<String>(operators.Keys);
}//public virtual ICollection<String> GetRegisteredOperatorStrings()
/// <summary>Resets the graphics state stack, matrices and resources.</summary>
public virtual void Reset()
{
memoryLimitsHandler = null;
pageSize = 0;
gsStack.Clear();
gsStack.Push(new ParserGraphicsState());
textMatrix = null;
textLineMatrix = null;
resourcesStack = new List<PdfResources>();
isClip = false;
currentPath = new Path();
}// public virtual void Reset()
/// <summary>
/// Gets the current
/// <see cref="ParserGraphicsState"/>
/// </summary>
/// <returns>
/// the current
/// <see cref="ParserGraphicsState"/>
/// </returns>
public virtual ParserGraphicsState GetGraphicsState()
{
return gsStack.Peek();
}// public virtual ParserGraphicsState GetGraphicsState()
/// <summary>Processes PDF syntax.</summary>
/// <remarks>
/// Processes PDF syntax.
/// <b>Note:</b> If you re-use a given
/// <see cref="PdfCanvasProcessor"/>
/// , you must call
/// <see cref="Reset()"/>
/// </remarks>
/// <param name="contentBytes">the bytes of a content stream</param>
/// <param name="resources">the resources of the content stream. Must not be null.</param>
public virtual void ProcessContent(byte[] contentBytes, PdfResources resources)
{
// saan tested this ok and found that it is working and to make it faster Console.WriteLine("TOOOOOOOO IMPORTANT kernel_pdf_canvas_parser_PdfCanvasProcessor.cs inside public virtual void ProcessContent(byte[] contentBytes, PdfResources resources)");
if (resources == null)
{
throw new PdfException(KernelExceptionMessageConstant.RESOURCES_CANNOT_BE_NULL);
}
if (memoryLimitsHandler != null)
{
pageSize += (long)contentBytes.Length;
memoryLimitsHandler.CheckIfPageSizeExceedsTheLimit(this.pageSize);
}//if (memoryLimitsHandler != null)
this.resourcesStack.Add(resources);
PdfTokenizer tokeniser
=
new
PdfTokenizer
(new RandomAccessFileOrArray
(new RandomAccessSourceFactory()
.CreateSource (contentBytes)));
PdfCanvasParser ps = new PdfCanvasParser(tokeniser, resources);
IList<PdfObject> operands = new List<PdfObject>();
try
{
while (ps.Parse(operands).Count > 0)
{
PdfLiteral @operator = (PdfLiteral)operands[operands.Count - 1];
InvokeOperator(@operator, operands);
} //while (ps.Parse(operands).Count > 0)
}
catch (System.IO.IOException e)
{
throw new PdfException(KernelExceptionMessageConstant.CANNOT_PARSE_CONTENT_STREAM, e);
}
this.resourcesStack.JRemoveAt(resourcesStack.Count - 1);
}// public virtual void ProcessContent(byte[] contentBytes, PdfResources resources)
/// <summary>Processes PDF syntax.</summary>
/// <remarks>
/// Processes PDF syntax.
/// <strong>Note:</strong> If you re-use a given
/// <see cref="PdfCanvasProcessor"/>
/// , you must call
/// <see cref="Reset()"/>
/// </remarks>
/// <param name="page">the page to process</param>
public virtual void ProcessPageContent(PdfPage page)
{
this.memoryLimitsHandler
=
page
.GetDocument()
.GetMemoryLimitsAwareHandler();
InitClippingPath(page);
ParserGraphicsState gs = GetGraphicsState();
EventOccurred(new ClippingPathInfo(gs, gs.GetClippingPath(), gs.GetCtm()), EventType.CLIP_PATH_CHANGED);
ProcessContent(page.GetContentBytes(), page.GetResources());
}// public virtual void ProcessPageContent(PdfPage page)
/// <summary>
/// Accessor method for the
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener"/>
/// object maintained in this class.
/// </summary>
/// <remarks>
/// Accessor method for the
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener"/>
/// object maintained in this class.
/// Necessary for implementing custom ContentOperator implementations.
/// </remarks>
/// <returns>the renderListener</returns>
public virtual IEventListener GetEventListener()
{
return eventListener;
}//public virtual IEventListener GetEventListener()
/// <summary>Loads all the supported graphics and text state operators in a map.</summary>
public virtual void PopulateOperators()
{
RegisterContentOperator(DEFAULT_OPERATOR, new PdfCanvasProcessor.IgnoreOperator());
RegisterContentOperator("q", new PdfCanvasProcessor.PushGraphicsStateOperator());
RegisterContentOperator("Q", new PdfCanvasProcessor.PopGraphicsStateOperator());
RegisterContentOperator("cm", new PdfCanvasProcessor.ModifyCurrentTransformationMatrixOperator());
RegisterContentOperator("Do", new PdfCanvasProcessor.DoOperator());
RegisterContentOperator("BMC", new PdfCanvasProcessor.BeginMarkedContentOperator());
RegisterContentOperator("BDC", new PdfCanvasProcessor.BeginMarkedContentDictionaryOperator());
RegisterContentOperator("EMC", new PdfCanvasProcessor.EndMarkedContentOperator());
if (
supportedEvents == null
||
supportedEvents.Contains(EventType.RENDER_TEXT)
||
supportedEvents.Contains(EventType.RENDER_PATH)
||
supportedEvents.Contains(EventType.CLIP_PATH_CHANGED)
)
{
RegisterContentOperator("g", new PdfCanvasProcessor.SetGrayFillOperator());
RegisterContentOperator("G", new PdfCanvasProcessor.SetGrayStrokeOperator());
RegisterContentOperator("rg", new PdfCanvasProcessor.SetRGBFillOperator());
RegisterContentOperator("RG", new PdfCanvasProcessor.SetRGBStrokeOperator());
RegisterContentOperator("k", new PdfCanvasProcessor.SetCMYKFillOperator());
RegisterContentOperator("K", new PdfCanvasProcessor.SetCMYKStrokeOperator());
RegisterContentOperator("cs", new PdfCanvasProcessor.SetColorSpaceFillOperator());
RegisterContentOperator("CS", new PdfCanvasProcessor.SetColorSpaceStrokeOperator());
RegisterContentOperator("sc", new PdfCanvasProcessor.SetColorFillOperator());
RegisterContentOperator("SC", new PdfCanvasProcessor.SetColorStrokeOperator());
RegisterContentOperator("scn", new PdfCanvasProcessor.SetColorFillOperator());
RegisterContentOperator("SCN", new PdfCanvasProcessor.SetColorStrokeOperator());
RegisterContentOperator("gs", new PdfCanvasProcessor.ProcessGraphicsStateResourceOperator());
}
if (supportedEvents == null || supportedEvents.Contains(EventType.RENDER_IMAGE))
{
RegisterContentOperator("EI", new PdfCanvasProcessor.EndImageOperator());
}
if (
supportedEvents == null
||
supportedEvents.Contains(EventType.RENDER_TEXT)
||
supportedEvents.Contains (EventType.BEGIN_TEXT)
||
supportedEvents.Contains(EventType.END_TEXT)
)
{
RegisterContentOperator("BT", new PdfCanvasProcessor.BeginTextOperator());
RegisterContentOperator("ET", new PdfCanvasProcessor.EndTextOperator());
}
if (supportedEvents == null || supportedEvents.Contains(EventType.RENDER_TEXT))
{
PdfCanvasProcessor
.SetTextCharacterSpacingOperator
tcOperator
=
new PdfCanvasProcessor.SetTextCharacterSpacingOperator ();
RegisterContentOperator("Tc", tcOperator);
PdfCanvasProcessor.SetTextWordSpacingOperator
twOperator
= new PdfCanvasProcessor.SetTextWordSpacingOperator ();
RegisterContentOperator("Tw", twOperator);
RegisterContentOperator("Tz", new PdfCanvasProcessor.SetTextHorizontalScalingOperator());
PdfCanvasProcessor.SetTextLeadingOperator
tlOperator
=
new PdfCanvasProcessor.SetTextLeadingOperator();
RegisterContentOperator("TL", tlOperator);
RegisterContentOperator("Tf", new PdfCanvasProcessor.SetTextFontOperator());
RegisterContentOperator("Tr", new PdfCanvasProcessor.SetTextRenderModeOperator());
RegisterContentOperator("Ts", new PdfCanvasProcessor.SetTextRiseOperator());
PdfCanvasProcessor.TextMoveStartNextLineOperator
tdOperator
= new PdfCanvasProcessor.TextMoveStartNextLineOperator ();
RegisterContentOperator("Td", tdOperator);
RegisterContentOperator
(
"TD",
new
PdfCanvasProcessor.TextMoveStartNextLineWithLeadingOperator(tdOperator, tlOperator)
);
RegisterContentOperator("Tm", new PdfCanvasProcessor.TextSetTextMatrixOperator());
PdfCanvasProcessor.TextMoveNextLineOperator
tstarOperator
=
new PdfCanvasProcessor.TextMoveNextLineOperator (tdOperator);
RegisterContentOperator("T*", tstarOperator);
PdfCanvasProcessor.ShowTextOperator tjOperator = new PdfCanvasProcessor.ShowTextOperator();
RegisterContentOperator("Tj", tjOperator);
PdfCanvasProcessor.MoveNextLineAndShowTextOperator
tickOperator
=
new PdfCanvasProcessor.MoveNextLineAndShowTextOperator
(tstarOperator, tjOperator);
RegisterContentOperator("'", tickOperator);
RegisterContentOperator
(
"\""
,
new
PdfCanvasProcessor
.MoveNextLineAndShowTextWithSpacingOperator(twOperator, tcOperator, tickOperator)
);
RegisterContentOperator("TJ", new PdfCanvasProcessor.ShowTextArrayOperator());
}
if (
supportedEvents == null
||
supportedEvents.Contains(EventType.CLIP_PATH_CHANGED)
||
supportedEvents.Contains(EventType.RENDER_PATH))
{
RegisterContentOperator("w", new PdfCanvasProcessor.SetLineWidthOperator());
RegisterContentOperator("J", new PdfCanvasProcessor.SetLineCapOperator());
RegisterContentOperator("j", new PdfCanvasProcessor.SetLineJoinOperator());
RegisterContentOperator("M", new PdfCanvasProcessor.SetMiterLimitOperator());
RegisterContentOperator("d", new PdfCanvasProcessor.SetLineDashPatternOperator());
int fillStroke = PathRenderInfo.FILL | PathRenderInfo.STROKE;
RegisterContentOperator("m", new PdfCanvasProcessor.MoveToOperator());
RegisterContentOperator("l", new PdfCanvasProcessor.LineToOperator());
RegisterContentOperator("c", new PdfCanvasProcessor.CurveOperator());
RegisterContentOperator("v", new PdfCanvasProcessor.CurveFirstPointDuplicatedOperator());
RegisterContentOperator("y", new PdfCanvasProcessor.CurveFourhPointDuplicatedOperator());
RegisterContentOperator("h", new PdfCanvasProcessor.CloseSubpathOperator());
RegisterContentOperator("re", new PdfCanvasProcessor.RectangleOperator());
RegisterContentOperator("S", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.STROKE, -1, false));
RegisterContentOperator("s", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.STROKE, -1, true));
RegisterContentOperator
("f", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.FILL, PdfCanvasConstants.FillingRule
.NONZERO_WINDING, false));
RegisterContentOperator("F", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.FILL, PdfCanvasConstants.FillingRule
.NONZERO_WINDING, false));
RegisterContentOperator("f*", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.FILL, PdfCanvasConstants.FillingRule
.EVEN_ODD, false));
RegisterContentOperator("B", new PdfCanvasProcessor.PaintPathOperator(fillStroke, PdfCanvasConstants.FillingRule
.NONZERO_WINDING, false));
RegisterContentOperator("B*", new PdfCanvasProcessor.PaintPathOperator(fillStroke, PdfCanvasConstants.FillingRule
.EVEN_ODD, false));
RegisterContentOperator("b", new PdfCanvasProcessor.PaintPathOperator(fillStroke, PdfCanvasConstants.FillingRule
.NONZERO_WINDING, true));
RegisterContentOperator("b*", new PdfCanvasProcessor.PaintPathOperator(fillStroke, PdfCanvasConstants.FillingRule
.EVEN_ODD, true));
RegisterContentOperator("n", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.NO_OP, -1, false));
RegisterContentOperator("W", new PdfCanvasProcessor.ClipPathOperator(PdfCanvasConstants.FillingRule.NONZERO_WINDING
));
RegisterContentOperator("W*", new PdfCanvasProcessor.ClipPathOperator(PdfCanvasConstants.FillingRule.EVEN_ODD
));
}
}
/// <summary>Displays the current path.</summary>
/// <param name="operation">
/// One of the possible combinations of
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.STROKE"/>
/// and
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.FILL"/>
/// values or
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.NO_OP"/>
/// </param>
/// <param name="rule">
/// Either
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// or
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.EVEN_ODD"/>
/// In case it isn't applicable pass any <c>byte</c> value.
/// </param>
public virtual void PaintPath(int operation, int rule)
{
ParserGraphicsState gs = GetGraphicsState();
PathRenderInfo renderInfo
=
new PathRenderInfo(this.markedContentStack, gs, currentPath, operation, rule, isClip, clippingRule);
EventOccurred(renderInfo, EventType.RENDER_PATH);
if (isClip)
{
isClip = false;
gs.Clip(currentPath, clippingRule);
EventOccurred(new ClippingPathInfo(gs, gs.GetClippingPath(), gs.GetCtm()), EventType.CLIP_PATH_CHANGED);
}
currentPath = new Path();
}// public virtual void PaintPath(int operation, int rule)
/// <summary>Invokes an operator.</summary>
/// <param name="operator">the PDF Syntax of the operator</param>
/// <param name="operands">a list with operands</param>
public virtual void InvokeOperator(PdfLiteral @operator, IList<PdfObject> operands)
{
IContentOperator op = operators.Get(@operator.ToString());
if (op == null)
{
op = operators.Get(DEFAULT_OPERATOR);
}
op.Invoke(this, @operator, operands);
}// public virtual void InvokeOperator(PdfLiteral @operator, IList<PdfObject> operands)
public virtual PdfStream GetXObjectStream(PdfName xobjectName)
{
PdfDictionary xobjects = GetResources().GetResource(PdfName.XObject);
return xobjects.GetAsStream(xobjectName);
}// public virtual PdfStream GetXObjectStream(PdfName xobjectName)
public virtual PdfResources GetResources()
{
return resourcesStack[resourcesStack.Count - 1];
}// public virtual PdfResources GetResources()
public virtual void PopulateXObjectDoHandlers()
{
RegisterXObjectDoHandler(PdfName.Default, new PdfCanvasProcessor.IgnoreXObjectDoHandler());
RegisterXObjectDoHandler(PdfName.Form, new PdfCanvasProcessor.FormXObjectDoHandler());
if (supportedEvents == null || supportedEvents.Contains(EventType.RENDER_IMAGE))
{
RegisterXObjectDoHandler(PdfName.Image, new PdfCanvasProcessor.ImageXObjectDoHandler());
}// if (supportedEvents == null || supportedEvents.Contains(EventType.RENDER_IMAGE))
}// public virtual void PopulateXObjectDoHandlers()
/// <summary>
/// Creates a
/// <see cref="DETA7.Kernel.Font.PdfFont"/>
/// object by a font dictionary.
/// </summary>
/// <remarks>
/// Creates a
/// <see cref="DETA7.Kernel.Font.PdfFont"/>
/// object by a font dictionary. The font may have been cached in case
/// it is an indirect object.
/// </remarks>
/// <param name="fontDict">
/// the
/// <see cref="DETA7.Kernel.Pdf.PdfDictionary">font dictionary</see>
/// to create the font from
/// </param>
/// <returns>the created font</returns>
public virtual PdfFont GetFont(PdfDictionary fontDict)
{
if (fontDict.GetIndirectReference() == null)
{
return PdfFontFactory.CreateFont(fontDict);
}
else
{
int n = fontDict.GetIndirectReference().GetObjNumber();
WeakReference fontRef = cachedFonts.Get(n);
PdfFont font = (PdfFont)(fontRef == null ? null : fontRef.Target);
if (font == null)
{
font = PdfFontFactory.CreateFont(fontDict);
cachedFonts.Put(n, new WeakReference(font));
}
return font;
}
}// public virtual PdfFont GetFont(PdfDictionary fontDict)
/// <summary>Add to the marked content stack</summary>
/// <param name="tag">the tag of the marked content</param>
/// <param name="dict">the PdfDictionary associated with the marked content</param>
public virtual void BeginMarkedContent(PdfName tag, PdfDictionary dict)
{
markedContentStack.Push(new CanvasTag(tag).SetProperties(dict));
}//public virtual void BeginMarkedContent(PdfName tag, PdfDictionary dict)
/// <summary>Remove the latest marked content from the stack.</summary>
/// <remarks>Remove the latest marked content from the stack. Keeps track of the BMC, BDC and EMC operators.</remarks>
public virtual void EndMarkedContent()
{
markedContentStack.Pop();
}//public virtual void EndMarkedContent()
/// <summary>Used to trigger beginTextBlock on the renderListener</summary>
public void BeginText()
{
EventOccurred(null, EventType.BEGIN_TEXT);
}// public void BeginText()
/// <summary>Used to trigger endTextBlock on the renderListener</summary>
public void EndText()
{
EventOccurred(null, EventType.END_TEXT);
}//public void EndText()
/// <summary>This is a proxy to pass only those events to the event listener which are supported by it.</summary>
/// <param name="data">event data</param>
/// <param name="type">event type</param>
public virtual void EventOccurred(IEventData data, EventType type)
{
if (supportedEvents == null || supportedEvents.Contains(type))
{
eventListener.EventOccurred(data, type);
}// if (supportedEvents == null || supportedEvents.Contains(type))
if (data is AbstractRenderInfo)
{
((AbstractRenderInfo)data).ReleaseGraphicsState();
}// if (data is AbstractRenderInfo)
}// public virtual void EventOccurred(IEventData data, EventType type)
/// <summary>Displays text.</summary>
/// <param name="string">the text to display</param>
public void DisplayPdfString(PdfString @string)
{
TextRenderInfo renderInfo = new TextRenderInfo(@string, GetGraphicsState(), textMatrix, markedContentStack );
textMatrix = new Matrix(renderInfo.GetUnscaledWidth(), 0).Multiply(textMatrix);
EventOccurred(renderInfo, EventType.RENDER_TEXT);
}//public void DisplayPdfString(PdfString @string)
/// <summary>Displays an XObject using the registered handler for this XObject's subtype</summary>
/// <param name="resourceName">the name of the XObject to retrieve from the resource dictionary</param>
public void DisplayXObject(PdfName resourceName)
{
PdfStream xobjectStream = GetXObjectStream(resourceName);
PdfName subType = xobjectStream.GetAsName(PdfName.Subtype);
IXObjectDoHandler handler = xobjectDoHandlers.Get(subType);
if (handler == null)
{
handler = xobjectDoHandlers.Get(PdfName.Default);
}// if (handler == null)
handler.HandleXObject(this, this.markedContentStack, xobjectStream, resourceName);
}// public void DisplayXObject(PdfName resourceName)
public void DisplayImage(Stack<CanvasTag> canvasTagHierarchy, PdfStream imageStream, PdfName resourceName , bool isInline)
{
PdfDictionary colorSpaceDic = GetResources().GetResource(PdfName.ColorSpace);
ImageRenderInfo renderInfo
= new ImageRenderInfo
(
canvasTagHierarchy
, GetGraphicsState()
, GetGraphicsState().GetCtm()
, imageStream
, resourceName
, colorSpaceDic
, isInline
);
EventOccurred(renderInfo, EventType.RENDER_IMAGE);
}//public void DisplayImage(Stack<CanvasTag> canvasTagHierarchy, PdfStream imageStream, PdfName resourceName , bool isInline)
/// <summary>Adjusts the text matrix for the specified adjustment value (see TJ operator in the PDF spec for information)
/// </summary>
/// <param name="tj">the text adjustment</param>
public void ApplyTextAdjust(float tj)
{
float adjustBy
=
FontProgram
.ConvertTextSpaceToGlyphSpace(-tj)
*
GetGraphicsState()
.GetFontSize()
*
(GetGraphicsState ()
.GetHorizontalScaling() / 100F
);
textMatrix = new Matrix(adjustBy, 0).Multiply(textMatrix);
}// public void ApplyTextAdjust(float tj)
public void InitClippingPath(PdfPage page)
{
Path clippingPath = new Path();
clippingPath.Rectangle(page.GetCropBox());
GetGraphicsState()
.SetClippingPath(clippingPath);
}// public void InitClippingPath(PdfPage page)
/// <summary>A handler that implements operator (unregistered).</summary>
public class IgnoreOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
}// public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
// ignore the operator
}// public class IgnoreOperator : IContentOperator
/// <summary>A handler that implements operator (TJ).</summary>
/// <remarks>A handler that implements operator (TJ). For more information see Table 51 ISO-32000-1</remarks>
public class ShowTextArrayOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfArray array = (PdfArray)operands[0];
float tj = 0;
foreach (PdfObject entryObj in array)
{
if (entryObj is PdfString)
{
processor.DisplayPdfString((PdfString)entryObj);
tj = 0;
}
else
{
tj = ((PdfNumber)entryObj).FloatValue();
processor.ApplyTextAdjust(tj);
}
}
}
}
/// <summary>A handler that implements operator (").</summary>
/// <remarks>A handler that implements operator ("). For more information see Table 51 ISO-32000-1</remarks>
public class MoveNextLineAndShowTextWithSpacingOperator : IContentOperator
{
public PdfCanvasProcessor.SetTextWordSpacingOperator setTextWordSpacing;
public PdfCanvasProcessor.SetTextCharacterSpacingOperator setTextCharacterSpacing;
public PdfCanvasProcessor.MoveNextLineAndShowTextOperator moveNextLineAndShowText;
/// <summary>Create new instance of this handler.</summary>
/// <param name="setTextWordSpacing">the handler for Tw operator</param>
/// <param name="setTextCharacterSpacing">the handler for Tc operator</param>
/// <param name="moveNextLineAndShowText">the handler for ' operator</param>
public MoveNextLineAndShowTextWithSpacingOperator
(
PdfCanvasProcessor.SetTextWordSpacingOperator setTextWordSpacing
, PdfCanvasProcessor.SetTextCharacterSpacingOperator setTextCharacterSpacing
, PdfCanvasProcessor.MoveNextLineAndShowTextOperator
moveNextLineAndShowText
)
{
this.setTextWordSpacing = setTextWordSpacing;
this.setTextCharacterSpacing = setTextCharacterSpacing;
this.moveNextLineAndShowText = moveNextLineAndShowText;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber aw = (PdfNumber)operands[0];
PdfNumber ac = (PdfNumber)operands[1];
PdfString @string = (PdfString)operands[2];
IList<PdfObject> twOperands = new List<PdfObject>(1);
twOperands.Add(0, aw);
setTextWordSpacing.Invoke(processor, null, twOperands);
IList<PdfObject> tcOperands = new List<PdfObject>(1);
tcOperands.Add(0, ac);
setTextCharacterSpacing.Invoke(processor, null, tcOperands);
IList<PdfObject> tickOperands = new List<PdfObject>(1);
tickOperands.Add(0, @string);
moveNextLineAndShowText.Invoke(processor, null, tickOperands);
}
}
/// <summary>A handler that implements operator (').</summary>
/// <remarks>A handler that implements operator ('). For more information see Table 51 ISO-32000-1</remarks>
public class MoveNextLineAndShowTextOperator : IContentOperator
{
public PdfCanvasProcessor.TextMoveNextLineOperator textMoveNextLine;
public PdfCanvasProcessor.ShowTextOperator showText;
/// <summary>Creates the new instance of this handler</summary>
/// <param name="textMoveNextLine">the handler for T* operator</param>
/// <param name="showText">the handler for Tj operator</param>
public MoveNextLineAndShowTextOperator(PdfCanvasProcessor.TextMoveNextLineOperator textMoveNextLine, PdfCanvasProcessor.ShowTextOperator
showText)
{
this.textMoveNextLine = textMoveNextLine;
this.showText = showText;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
textMoveNextLine.Invoke(processor, null, new List<PdfObject>(0));
showText.Invoke(processor, null, operands);
}
}
/// <summary>A handler that implements operator (Tj).</summary>
/// <remarks>A handler that implements operator (Tj). For more information see Table 51 ISO-32000-1</remarks>
public class ShowTextOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfString @string = (PdfString)operands[0];
processor.DisplayPdfString(@string);
}
}
/// <summary>A handler that implements operator (T*).</summary>
/// <remarks>A handler that implements operator (T*). For more information see Table 51 ISO-32000-1</remarks>
public class TextMoveNextLineOperator : IContentOperator
{
public PdfCanvasProcessor.TextMoveStartNextLineOperator moveStartNextLine;
public TextMoveNextLineOperator(PdfCanvasProcessor.TextMoveStartNextLineOperator moveStartNextLine)
{
this.moveStartNextLine = moveStartNextLine;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
IList<PdfObject>
tdoperands
= new List<PdfObject>(2);
tdoperands.Add(0, new PdfNumber(0));
tdoperands.Add(1, new PdfNumber(-processor.GetGraphicsState().GetLeading()));
moveStartNextLine.Invoke(processor, null, tdoperands);
}
}
/// <summary>A handler that implements operator (Tm).</summary>
/// <remarks>A handler that implements operator (Tm). For more information see Table 51 ISO-32000-1</remarks>
public class TextSetTextMatrixOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float a = ((PdfNumber)operands[0]).FloatValue();
float b = ((PdfNumber)operands[1]).FloatValue();
float c = ((PdfNumber)operands[2]).FloatValue();
float d = ((PdfNumber)operands[3]).FloatValue();
float e = ((PdfNumber)operands[4]).FloatValue();
float f = ((PdfNumber)operands[5]).FloatValue();
processor.textLineMatrix = new Matrix(a, b, c, d, e, f);
processor.textMatrix = processor.textLineMatrix;
}
}///class completes
/// <summary>A handler that implements operator (TD).</summary>
/// <remarks>A handler that implements operator (TD). For more information see Table 51 ISO-32000-1</remarks>
public class TextMoveStartNextLineWithLeadingOperator : IContentOperator
{
public PdfCanvasProcessor.TextMoveStartNextLineOperator moveStartNextLine;
public PdfCanvasProcessor.SetTextLeadingOperator setTextLeading;
public TextMoveStartNextLineWithLeadingOperator(PdfCanvasProcessor.TextMoveStartNextLineOperator moveStartNextLine
, PdfCanvasProcessor.SetTextLeadingOperator setTextLeading)
{
this.moveStartNextLine = moveStartNextLine;
this.setTextLeading = setTextLeading;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
float ty = ((PdfNumber)operands[1]).FloatValue();
IList<PdfObject> tlOperands = new List<PdfObject>(1);
tlOperands.Add(0, new PdfNumber(-ty));
setTextLeading.Invoke(processor, null, tlOperands);
moveStartNextLine.Invoke(processor, null, operands);
}
}
/// <summary>A handler that implements operator (Td).</summary>
/// <remarks>A handler that implements operator (Td). For more information see Table 51 ISO-32000-1</remarks>
public class TextMoveStartNextLineOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float tx = ((PdfNumber)operands[0]).FloatValue();
float ty = ((PdfNumber)operands[1]).FloatValue();
Matrix translationMatrix = new Matrix(tx, ty);
processor.textMatrix = translationMatrix.Multiply(processor.textLineMatrix);
processor.textLineMatrix = processor.textMatrix;
}
}
/// <summary>A handler that implements operator (Tf).</summary>
/// <remarks>A handler that implements operator (Tf). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextFontOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfName fontResourceName = (PdfName)operands[0];
float size = ((PdfNumber)operands[1]).FloatValue();
PdfDictionary fontsDictionary = processor.GetResources().GetResource(PdfName.Font);
PdfDictionary fontDict = fontsDictionary.GetAsDictionary(fontResourceName);
PdfFont font = null;
font = processor.GetFont(fontDict);
processor.GetGraphicsState().SetFont(font);
processor.GetGraphicsState().SetFontSize(size);
}
}
/// <summary>A handler that implements operator (Tr).</summary>
/// <remarks>A handler that implements operator (Tr). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextRenderModeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber render = (PdfNumber)operands[0];
processor.GetGraphicsState().SetTextRenderingMode(render.IntValue());
}
}
/// <summary>A handler that implements operator (Ts).</summary>
/// <remarks>A handler that implements operator (Ts). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextRiseOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber rise = (PdfNumber)operands[0];
processor.GetGraphicsState().SetTextRise(rise.FloatValue());
}
}
/// <summary>A handler that implements operator (TL).</summary>
/// <remarks>A handler that implements operator (TL). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextLeadingOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber leading = (PdfNumber)operands[0];
processor.GetGraphicsState().SetLeading(leading.FloatValue());
}
}
/// <summary>A handler that implements operator (Tz).</summary>
/// <remarks>A handler that implements operator (Tz). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextHorizontalScalingOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber scale = (PdfNumber)operands[0];
processor.GetGraphicsState().SetHorizontalScaling(scale.FloatValue());
}
}
/// <summary>A handler that implements operator (Tc).</summary>
/// <remarks>A handler that implements operator (Tc). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextCharacterSpacingOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber charSpace = (PdfNumber)operands[0];
processor.GetGraphicsState().SetCharSpacing(charSpace.FloatValue());
}
}
/// <summary>A handler that implements operator (Tw).</summary>
/// <remarks>A handler that implements operator (Tw). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextWordSpacingOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber wordSpace = (PdfNumber)operands[0];
processor.GetGraphicsState().SetWordSpacing(wordSpace.FloatValue());
}
}
/// <summary>A handler that implements operator (gs).</summary>
/// <remarks>A handler that implements operator (gs). For more information see Table 51 ISO-32000-1</remarks>
public class ProcessGraphicsStateResourceOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfName dictionaryName = (PdfName)operands[0];
PdfDictionary
extGState
= processor.GetResources().GetResource(PdfName.ExtGState);
if (extGState == null)
{
throw new PdfException(KernelExceptionMessageConstant.RESOURCES_DO_NOT_CONTAIN_EXTGSTATE_ENTRY_UNABLE_TO_PROCESS_THIS_OPERATOR).SetMessageParams(@operator);
}
PdfDictionary gsDic = extGState.GetAsDictionary(dictionaryName);
if (gsDic == null)
{
gsDic = extGState.GetAsStream(dictionaryName);
if (gsDic == null)
{
throw new PdfException(KernelExceptionMessageConstant.UNKNOWN_GRAPHICS_STATE_DICTIONARY).SetMessageParams(
dictionaryName);
}
}
PdfArray fontParameter = gsDic.GetAsArray(PdfName.Font);
if (fontParameter != null)
{
PdfFont font = processor.GetFont(fontParameter.GetAsDictionary(0));
float size = fontParameter.GetAsNumber(1).FloatValue();
processor.GetGraphicsState().SetFont(font);
processor.GetGraphicsState().SetFontSize(size);
}
PdfExtGState pdfExtGState = new PdfExtGState(gsDic.Clone(JavaCollectionsUtil.SingletonList(PdfName.Font)));
processor.GetGraphicsState().UpdateFromExtGState(pdfExtGState);
}
}
/// <summary>A handler that implements operator (q).</summary>
/// <remarks>A handler that implements operator (q). For more information see Table 51 ISO-32000-1</remarks>
public class PushGraphicsStateOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
ParserGraphicsState gs = processor.gsStack.Peek();
ParserGraphicsState copy = new ParserGraphicsState(gs);
processor.gsStack.Push(copy);
}
}
/// <summary>A handler that implements operator (cm).</summary>
/// <remarks>A handler that implements operator (cm). For more information see Table 51 ISO-32000-1</remarks>
public class ModifyCurrentTransformationMatrixOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float a = ((PdfNumber)operands[0]).FloatValue();
float b = ((PdfNumber)operands[1]).FloatValue();
float c = ((PdfNumber)operands[2]).FloatValue();
float d = ((PdfNumber)operands[3]).FloatValue();
float e = ((PdfNumber)operands[4]).FloatValue();
float f = ((PdfNumber)operands[5]).FloatValue();
Matrix matrix = new Matrix(a, b, c, d, e, f);
// a b 0
// c d 0
// e f 1
// matrix.Get(Matrix.I11) IS a
// matrix.Get(Matrix.I12) IS c
// matrix.Get(Matrix.I13) IS e
// matrix.Get(Matrix.I21) IS b
// matrix.Get(Matrix.I22) IS d
// matrix.Get(Matrix.I23) IS f
// matrix.Get(Matrix.I31) IS 0
// matrix.Get(Matrix.I32) IS 0
// matrix.Get(Matrix.I33) IS 1
//flush first
CTM_SAAN__a = 0;// ((PdfNumber)operands[0]).FloatValue();
CTM_SAAN__b = 0;// ((PdfNumber)operands[1]).FloatValue();
CTM_SAAN__c = 0;// ((PdfNumber)operands[2]).FloatValue();
CTM_SAAN__d = 0;// ((PdfNumber)operands[3]).FloatValue();
CTM_SAAN__e = 0;// ((PdfNumber)operands[4]).FloatValue();
CTM_SAAN__f = 0;// ((PdfNumber)operands[5]).FloatValue();
//refills
CTM_SAAN__a = a;// ((PdfNumber)operands[0]).FloatValue();
CTM_SAAN__b = b;// ((PdfNumber)operands[1]).FloatValue();
CTM_SAAN__c = c;// ((PdfNumber)operands[2]).FloatValue();
CTM_SAAN__d = d;// ((PdfNumber)operands[3]).FloatValue();
CTM_SAAN__e = e;// ((PdfNumber)operands[4]).FloatValue();
CTM_SAAN__f = f;// ((PdfNumber)operands[5]).FloatValue();
try
{
processor.GetGraphicsState().UpdateCtm(matrix);
}
catch (PdfException exception)
{
if (!(exception.InnerException is NoninvertibleTransformException))
{
throw;
}
else {
ILogger logger = DETA7LogManager.GetLogger(typeof(PdfCanvasProcessor));
logger.LogError(MessageFormatUtil.Format(DETA7.IO.Logs.IoLogMessageConstant.FAILED_TO_PROCESS_A_TRANSFORMATION_MATRIX));
}
}
}
}
/// <summary>Gets a color based on a list of operands and Color space.</summary>
public static Color GetColor(PdfColorSpace pdfColorSpace, IList<PdfObject> operands, PdfResources resources)
{
PdfObject pdfObject;
if (pdfColorSpace.GetPdfObject().IsIndirectReference())
{
pdfObject = ((PdfIndirectReference)pdfColorSpace.GetPdfObject()).GetRefersTo();
}
else
{
pdfObject = pdfColorSpace.GetPdfObject();
}
if (pdfObject.IsName())
{
if (PdfName.DeviceGray.Equals(pdfObject))
{
return new DeviceGray(GetColorants(operands)[0]);
}
else
{
if (PdfName.Pattern.Equals(pdfObject))
{
if (operands[0] is PdfName)
{
PdfPattern pattern = resources.GetPattern((PdfName)operands[0]);
if (pattern != null)
{
return new PatternColor(pattern);
}
}
}
}
if (PdfName.DeviceRGB.Equals(pdfObject))
{
float[] c = GetColorants(operands);
return new DeviceRgb(c[0], c[1], c[2]);
}
else
{
if (PdfName.DeviceCMYK.Equals(pdfObject))
{
float[] c = GetColorants(operands);
return new DeviceCmyk(c[0], c[1], c[2], c[3]);
}
}
}
else
{
if (pdfObject.IsArray())
{
PdfArray array = (PdfArray)pdfObject;
PdfName csType = array.GetAsName(0);
if (PdfName.CalGray.Equals(csType))
{
return new CalGray((PdfCieBasedCs.CalGray)pdfColorSpace, GetColorants(operands)[0]);
}
else
{
if (PdfName.CalRGB.Equals(csType))
{
return new CalRgb((PdfCieBasedCs.CalRgb)pdfColorSpace, GetColorants(operands));
}
else {
if (PdfName.Lab.Equals(csType))
{
return new Lab((PdfCieBasedCs.Lab)pdfColorSpace, GetColorants(operands));
}
else {
if (PdfName.ICCBased.Equals(csType))
{
return new IccBased((PdfCieBasedCs.IccBased)pdfColorSpace, GetColorants(operands));
}
else {
if (PdfName.Indexed.Equals(csType))
{
return new Indexed(pdfColorSpace, (int)GetColorants(operands)[0]);
}
else {
if (PdfName.Separation.Equals(csType))
{
return new Separation((PdfSpecialCs.Separation)pdfColorSpace, GetColorants(operands)[0]);
}
else
{
if (PdfName.DeviceN.Equals(csType))
{
return new DeviceN((PdfSpecialCs.DeviceN)pdfColorSpace, GetColorants(operands));
}
else
{
if (PdfName.Pattern.Equals(csType))
{
IList<PdfObject> underlyingOperands = new List<PdfObject>(operands);
PdfObject patternName = underlyingOperands.JRemoveAt(operands.Count - 2);
PdfColorSpace underlyingCs = ((PdfSpecialCs.UncoloredTilingPattern)pdfColorSpace).GetUnderlyingColorSpace(
);
if (patternName is PdfName)
{
PdfPattern pattern = resources.GetPattern((PdfName)patternName);
if (pattern is PdfPattern.Tiling && !((PdfPattern.Tiling)pattern).IsColored())
{
return new PatternColor((PdfPattern.Tiling)pattern, underlyingCs, GetColorants(underlyingOperands));
}
}
}
}
}
}
}
}
}
}
}
}
ILogger logger = DETA7LogManager.GetLogger(typeof(PdfCanvasProcessor));
logger.LogWarning(MessageFormatUtil.Format(KernelLogMessageConstant.UNABLE_TO_PARSE_COLOR_WITHIN_COLORSPACE
, JavaUtil.ArraysToString((Object[])operands.ToArray()), pdfColorSpace.GetPdfObject()));
return null;
}
/// <summary>Gets a color based on a list of operands.</summary>
public static Color GetColor(int nOperands, IList<PdfObject> operands)
{
float[] c = new float[nOperands];
for (int i = 0; i < nOperands; i++)
{
c[i] = ((PdfNumber)operands[i]).FloatValue();
}
switch (nOperands)
{
case 1:
{
return new DeviceGray(c[0]);
}
case 3:
{
return new DeviceRgb(c[0], c[1], c[2]);
}
case 4:
{
return new DeviceCmyk(c[0], c[1], c[2], c[3]);
}
}
return null;
}
public static float[] GetColorants(IList<PdfObject> operands)
{
float[] c = new float[operands.Count - 1];
for (int i = 0; i < operands.Count - 1; i++)
{
c[i] = ((PdfNumber)operands[i]).FloatValue();
}
return c;
}
/// <summary>A handler that implements operator (Q).</summary>
/// <remarks>A handler that implements operator (Q). For more information see Table 51 ISO-32000-1</remarks>
public class PopGraphicsStateOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.gsStack.Pop();
ParserGraphicsState gs = processor.GetGraphicsState();
processor.EventOccurred
(
new ClippingPathInfo
(
gs
, gs.GetClippingPath()
, gs.GetCtm()
)
, EventType.CLIP_PATH_CHANGED
);
}
}
/// <summary>A handler that implements operator (g).</summary>
/// <remarks>A handler that implements operator (g). For more information see Table 51 ISO-32000-1</remarks>
public class SetGrayFillOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetFillColor(GetColor(1, operands));
}
}
/// <summary>A handler that implements operator (G).</summary>
/// <remarks>A handler that implements operator (G). For more information see Table 51 ISO-32000-1</remarks>
public class SetGrayStrokeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetStrokeColor(GetColor(1, operands));
}
}
/// <summary>A handler that implements operator (rg).</summary>
/// <remarks>A handler that implements operator (rg). For more information see Table 51 ISO-32000-1</remarks>
public class SetRGBFillOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetFillColor(GetColor(3, operands));
}
}
/// <summary>A handler that implements operator (RG).</summary>
/// <remarks>A handler that implements operator (RG). For more information see Table 51 ISO-32000-1</remarks>
public class SetRGBStrokeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetStrokeColor(GetColor(3, operands));
}
}
/// <summary>A handler that implements operator (k).</summary>
/// <remarks>A handler that implements operator (k). For more information see Table 51 ISO-32000-1</remarks>
public class SetCMYKFillOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetFillColor(GetColor(4, operands));
}
}
/// <summary>A handler that implements operator (K).</summary>
/// <remarks>A handler that implements operator (K). For more information see Table 51 ISO-32000-1</remarks>
public class SetCMYKStrokeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetStrokeColor(GetColor(4, operands));
}
}
/// <summary>A handler that implements operator (CS).</summary>
/// <remarks>A handler that implements operator (CS). For more information see Table 51 ISO-32000-1</remarks>
public class SetColorSpaceFillOperator : IContentOperator {
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfColorSpace pdfColorSpace = DetermineColorSpace((PdfName)operands[0], processor);
processor.GetGraphicsState().SetFillColor(Color.MakeColor(pdfColorSpace));
}
//\cond DO_NOT_DOCUMENT
public static PdfColorSpace DetermineColorSpace(PdfName colorSpace, PdfCanvasProcessor processor)
{
PdfColorSpace pdfColorSpace;
if (PdfColorSpace.DIRECT_COLOR_SPACES.Contains(colorSpace))
{
pdfColorSpace = PdfColorSpace.MakeColorSpace(colorSpace);
}
else
{
PdfResources pdfResources = processor.GetResources();
PdfDictionary resourceColorSpace = pdfResources.GetPdfObject().GetAsDictionary(PdfName.ColorSpace);
pdfColorSpace = PdfColorSpace.MakeColorSpace(resourceColorSpace.Get(colorSpace));
}
return pdfColorSpace;
}
//\endcond
}
/// <summary>A handler that implements operator (cs).</summary>
/// <remarks>A handler that implements operator (cs). For more information see Table 51 ISO-32000-1</remarks>
public class SetColorSpaceStrokeOperator : IContentOperator {
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfColorSpace pdfColorSpace = PdfCanvasProcessor.SetColorSpaceFillOperator.DetermineColorSpace((PdfName)operands
[0], processor);
processor.GetGraphicsState().SetStrokeColor(Color.MakeColor(pdfColorSpace));
}
}
/// <summary>A handler that implements operator (sc / scn).</summary>
/// <remarks>A handler that implements operator (sc / scn). For more information see Table 51 ISO-32000-1</remarks>
public class SetColorFillOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor
.GetGraphicsState()
.SetFillColor
(
GetColor
(
processor.GetGraphicsState().GetFillColor().GetColorSpace()
, operands
, processor.GetResources()
)
);
}
}
/// <summary>A handler that implements operator (SC / SCN).</summary>
/// <remarks>A handler that implements operator (SC / SCN). For more information see Table 51 ISO-32000-1</remarks>
public class SetColorStrokeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor
.GetGraphicsState().SetStrokeColor
(
GetColor
(
processor.GetGraphicsState().GetStrokeColor().GetColorSpace()
, operands
, processor.GetResources()
)
);
}
}
/// <summary>A handler that implements operator (BT).</summary>
/// <remarks>A handler that implements operator (BT). For more information see Table 51 ISO-32000-1</remarks>
public class BeginTextOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.textMatrix = new Matrix();
processor.textLineMatrix = processor.textMatrix;
processor.BeginText();
}
}
/// <summary>A handler that implements operator (ET).</summary>
/// <remarks>A handler that implements operator (ET). For more information see Table 51 ISO-32000-1</remarks>
public class EndTextOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.textMatrix = null;
processor.textLineMatrix = null;
processor.EndText();
}
}
/// <summary>A handler that implements operator (BMC).</summary>
/// <remarks>A handler that implements operator (BMC). For more information see Table 51 ISO-32000-1</remarks>
public class BeginMarkedContentOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.BeginMarkedContent((PdfName)operands[0], null);
}
}
/// <summary>A handler that implements operator (BDC).</summary>
/// <remarks>A handler that implements operator (BDC). For more information see Table 51 ISO-32000-1</remarks>
public class BeginMarkedContentDictionaryOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfObject properties = operands[1];
processor
.BeginMarkedContent
(
(PdfName)operands[0], GetPropertiesDictionary(properties, processor.GetResources())
);
}
//\cond DO_NOT_DOCUMENT
public virtual PdfDictionary GetPropertiesDictionary(PdfObject operand1, PdfResources resources)
{
if (operand1.IsDictionary())
{
return (PdfDictionary)operand1;
}
PdfName dictionaryName = ((PdfName)operand1);
PdfDictionary properties = resources.GetResource(PdfName.Properties);
if (null == properties)
{
ILogger logger = DETA7LogManager.GetLogger(typeof(PdfCanvasProcessor));
logger.LogWarning(MessageFormatUtil.Format(DETA7.IO.Logs.IoLogMessageConstant.PDF_REFERS_TO_NOT_EXISTING_PROPERTY_DICTIONARY
, PdfName.Properties));
return null;
}
PdfDictionary propertiesDictionary = properties.GetAsDictionary(dictionaryName);
if (null == propertiesDictionary)
{
ILogger logger = DETA7LogManager.GetLogger(typeof(PdfCanvasProcessor));
logger.LogWarning(MessageFormatUtil.Format(DETA7.IO.Logs.IoLogMessageConstant.PDF_REFERS_TO_NOT_EXISTING_PROPERTY_DICTIONARY
, dictionaryName));
return null;
}
return properties.GetAsDictionary(dictionaryName);
}
//\endcond
}
/// <summary>A handler that implements operator (EMC).</summary>
/// <remarks>A handler that implements operator (EMC). For more information see Table 51 ISO-32000-1</remarks>
public class EndMarkedContentOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.EndMarkedContent();
}
}
/// <summary>A handler that implements operator (Do).</summary>
/// <remarks>A handler that implements operator (Do). For more information see Table 51 ISO-32000-1</remarks>
public class DoOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfName resourceName = (PdfName)operands[0];
processor.DisplayXObject(resourceName);
}
}
/// <summary>A handler that implements operator (EI).</summary>
/// <remarks>
/// A handler that implements operator (EI). For more information see Table 51 ISO-32000-1
/// BI and ID operators are parsed along with this operator.
/// This not a usual operator, it will have a single operand, which will be a PdfStream object which
/// encapsulates inline image dictionary and bytes
/// </remarks>
public class EndImageOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfStream imageStream = (PdfStream)operands[0];
processor.DisplayImage(processor.markedContentStack, imageStream, null, true);
}
}
/// <summary>A handler that implements operator (w).</summary>
/// <remarks>A handler that implements operator (w). For more information see Table 51 ISO-32000-1</remarks>
public class SetLineWidthOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
float lineWidth = ((PdfNumber)operands[0]).FloatValue();
processor.GetGraphicsState().SetLineWidth(lineWidth);
}
}
/// <summary>A handler that implements operator (J).</summary>
/// <remarks>A handler that implements operator (J). For more information see Table 51 ISO-32000-1</remarks>
public class SetLineCapOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
int lineCap = ((PdfNumber)operands[0]).IntValue();
processor.GetGraphicsState().SetLineCapStyle(lineCap);
}
}
/// <summary>A handler that implements operator (j).</summary>
/// <remarks>A handler that implements operator (j). For more information see Table 51 ISO-32000-1</remarks>
public class SetLineJoinOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
int lineJoin = ((PdfNumber)operands[0]).IntValue();
processor.GetGraphicsState().SetLineJoinStyle(lineJoin);
}
}
/// <summary>A handler that implements operator (M).</summary>
/// <remarks>A handler that implements operator (M). For more information see Table 51 ISO-32000-1</remarks>
public class SetMiterLimitOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
float miterLimit = ((PdfNumber)operands[0]).FloatValue();
processor.GetGraphicsState().SetMiterLimit(miterLimit);
}
}
/// <summary>A handler that implements operator (d).</summary>
/// <remarks>A handler that implements operator (d). For more information see Table 51 ISO-32000-1</remarks>
public class SetLineDashPatternOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetDashPattern(new PdfArray(JavaUtil.ArraysAsList(operands[0], operands[1])));
}
}
/// <summary>An XObject subtype handler for FORM</summary>
public class FormXObjectDoHandler : IXObjectDoHandler
{
public virtual void HandleXObject(PdfCanvasProcessor processor, Stack<CanvasTag> canvasTagHierarchy, PdfStream
xObjectStream, PdfName xObjectName)
{
PdfDictionary resourcesDic = xObjectStream.GetAsDictionary(PdfName.Resources);
PdfResources resources;
if (resourcesDic == null)
{
resources = processor.GetResources();
}
else
{
resources = new PdfResources(resourcesDic);
}
// we read the content bytes up here so if it fails we don't leave the graphics state stack corrupted
// this is probably not necessary (if we fail on this, probably the entire content stream processing
// operation should be rejected
byte[] contentBytes;
contentBytes = xObjectStream.GetBytes();
PdfArray matrix = xObjectStream.GetAsArray(PdfName.Matrix);
new PdfCanvasProcessor.PushGraphicsStateOperator().Invoke(processor, null, null);
if (matrix != null) {
float a = matrix.GetAsNumber(0).FloatValue();
float b = matrix.GetAsNumber(1).FloatValue();
float c = matrix.GetAsNumber(2).FloatValue();
float d = matrix.GetAsNumber(3).FloatValue();
float e = matrix.GetAsNumber(4).FloatValue();
float f = matrix.GetAsNumber(5).FloatValue();
Matrix formMatrix = new Matrix(a, b, c, d, e, f);
processor.GetGraphicsState().UpdateCtm(formMatrix);
}
processor.ProcessContent(contentBytes, resources);
new PdfCanvasProcessor.PopGraphicsStateOperator().Invoke(processor, null, null);
}
}
/// <summary>An XObject subtype handler for IMAGE</summary>
public class ImageXObjectDoHandler : IXObjectDoHandler {
public virtual void HandleXObject(PdfCanvasProcessor processor, Stack<CanvasTag> canvasTagHierarchy, PdfStream
xObjectStream, PdfName resourceName) {
processor.DisplayImage(canvasTagHierarchy, xObjectStream, resourceName, false);
}
}
/// <summary>An XObject subtype handler that does nothing</summary>
public class IgnoreXObjectDoHandler : IXObjectDoHandler
{
public virtual void HandleXObject(PdfCanvasProcessor processor, Stack<CanvasTag> canvasTagHierarchy, PdfStream
xObjectStream, PdfName xObjectName)
{
}
// ignore XObject subtype
}
/// <summary>A handler that implements operator (m).</summary>
/// <remarks>A handler that implements operator (m). For more information see Table 51 ISO-32000-1</remarks>
public class MoveToOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
float x = ((PdfNumber)operands[0]).FloatValue();
float y = ((PdfNumber)operands[1]).FloatValue();
processor.currentPath.MoveTo(x, y);
}
}
/// <summary>A handler that implements operator (l).</summary>
/// <remarks>A handler that implements operator (l). For more information see Table 51 ISO-32000-1</remarks>
public class LineToOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
float x = ((PdfNumber)operands[0]).FloatValue();
float y = ((PdfNumber)operands[1]).FloatValue();
processor.currentPath.LineTo(x, y);
}
}
/// <summary>A handler that implements operator (c).</summary>
/// <remarks>A handler that implements operator (c). For more information see Table 51 ISO-32000-1</remarks>
public class CurveOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
float x1 = ((PdfNumber)operands[0]).FloatValue();
float y1 = ((PdfNumber)operands[1]).FloatValue();
float x2 = ((PdfNumber)operands[2]).FloatValue();
float y2 = ((PdfNumber)operands[3]).FloatValue();
float x3 = ((PdfNumber)operands[4]).FloatValue();
float y3 = ((PdfNumber)operands[5]).FloatValue();
processor.currentPath.CurveTo(x1, y1, x2, y2, x3, y3);
}
}
/// <summary>A handler that implements operator (v).</summary>
/// <remarks>A handler that implements operator (v). For more information see Table 51 ISO-32000-1</remarks>
public class CurveFirstPointDuplicatedOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float x2 = ((PdfNumber)operands[0]).FloatValue();
float y2 = ((PdfNumber)operands[1]).FloatValue();
float x3 = ((PdfNumber)operands[2]).FloatValue();
float y3 = ((PdfNumber)operands[3]).FloatValue();
processor.currentPath.CurveTo(x2, y2, x3, y3);
}
}
/// <summary>A handler that implements operator (y).</summary>
/// <remarks>A handler that implements operator (y). For more information see Table 51 ISO-32000-1</remarks>
public class CurveFourhPointDuplicatedOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float x1 = ((PdfNumber)operands[0]).FloatValue();
float y1 = ((PdfNumber)operands[1]).FloatValue();
float x3 = ((PdfNumber)operands[2]).FloatValue();
float y3 = ((PdfNumber)operands[3]).FloatValue();
processor.currentPath.CurveFromTo(x1, y1, x3, y3);
}
}
/// <summary>A handler that implements operator (h).</summary>
/// <remarks>A handler that implements operator (h). For more information see Table 51 ISO-32000-1</remarks>
public class CloseSubpathOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.currentPath.CloseSubpath();
}
}
/// <summary>A handler that implements operator (re).</summary>
/// <remarks>A handler that implements operator (re). For more information see Table 51 ISO-32000-1</remarks>
public class RectangleOperator : IContentOperator {
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float x = ((PdfNumber)operands[0]).FloatValue();
float y = ((PdfNumber)operands[1]).FloatValue();
float w = ((PdfNumber)operands[2]).FloatValue();
float h = ((PdfNumber)operands[3]).FloatValue();
processor.currentPath.Rectangle(x, y, w, h);
}
}
/// <summary>A handler that implements operator (S, s, f, F, f*, B, B*, b, b*).</summary>
/// <remarks>A handler that implements operator (S, s, f, F, f*, B, B*, b, b*). For more information see Table 51 ISO-32000-1
/// </remarks>
public class PaintPathOperator : IContentOperator
{
public int operation;
public int rule;
public bool close;
/// <summary>Constructs PainPath object.</summary>
/// <param name="operation">
/// One of the possible combinations of
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.STROKE"/>
/// and
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.FILL"/>
/// values or
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.NO_OP"/>
/// </param>
/// <param name="rule">
/// Either
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// or
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.EVEN_ODD"/>
/// In case it isn't applicable pass any value.
/// </param>
/// <param name="close">Indicates whether the path should be closed or not.</param>
public PaintPathOperator(int operation, int rule, bool close)
{
this.operation = operation;
this.rule = rule;
this.close = close;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
if (close)
{
processor.currentPath.CloseSubpath();
}
processor.PaintPath(operation, rule);
}
}
/// <summary>A handler that implements operator (W, W*).</summary>
/// <remarks>A handler that implements operator (W, W*). For more information see Table 51 ISO-32000-1</remarks>
public class ClipPathOperator : IContentOperator
{
public int rule;
public ClipPathOperator(int rule)
{
this.rule = rule;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
processor.isClip = true;
processor.clippingRule = rule;
}
}
}
}
using System.Collections.Generic;
using DETA7.Commons.Utils;
using DETA7.Kernel.Colors;
using DETA7.Kernel.Geom;
using DETA7.Kernel.Pdf;
using DETA7.Kernel.Pdf.Canvas;
namespace DETA7.Kernel.Pdf.Canvas.Parser.Data
{
/// <summary>Contains information relating to painting current path.</summary>
public class PathRenderInfo : AbstractRenderInfo
{
/// <summary>End the path object without filling or stroking it.</summary>
/// <remarks>
/// End the path object without filling or stroking it. This operator shall be a path-painting no-op,
/// used primarily for the side effect of changing the current clipping path
/// </remarks>
public const int NO_OP = 0;
/// <summary>Value specifying stroke operation to perform on the current path.</summary>
public const int STROKE = 1;
/// <summary>Value specifying fill operation to perform on the current path.</summary>
/// <remarks>
/// Value specifying fill operation to perform on the current path. When the fill operation
/// is performed it should use either nonzero winding or even-odd rule.
/// </remarks>
public const int FILL = 2;
public Path path;
public int operation;
public int rule;
public bool isClip;
public int clippingRule;
/// <summary>Hierarchy of nested canvas tags for the text from the most inner (nearest to text) tag to the most outer.
/// </summary>
public IList<CanvasTag> canvasTagHierarchy;
/// <summary>
/// Creates the new
/// <see cref="PathRenderInfo"/>
/// instance.
/// </summary>
/// <param name="canvasTagHierarchy">the canvas tag hierarchy</param>
/// <param name="gs">the graphics state</param>
/// <param name="path">the path to be rendered</param>
/// <param name="operation">
/// one of the possible combinations of
/// <see cref="STROKE"/>
/// and
/// <see cref="FILL"/>
/// values or
/// <see cref="NO_OP"/>
/// </param>
/// <param name="rule">
/// either
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// or
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.EVEN_ODD"/>
/// </param>
/// <param name="isClip">
///
/// <see langword="true"/>
/// indicates that current path modifies the clipping path
/// </param>
/// <param name="clipRule">
/// either
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// or
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.EVEN_ODD"/>
/// </param>
public PathRenderInfo(Stack<CanvasTag> canvasTagHierarchy, CanvasGraphicsState gs, Path path, int operation
, int rule, bool isClip, int clipRule)
: base(gs)
{
this.canvasTagHierarchy = JavaCollectionsUtil.UnmodifiableList<CanvasTag>(new List<CanvasTag>(canvasTagHierarchy));
this.path = path;
this.operation = operation;
this.rule = rule;
this.isClip = isClip;
this.clippingRule = clipRule;
}
/// <summary>
/// If the operation is
/// <see cref="NO_OP"/>
/// then the rule is ignored,
/// otherwise
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// is used by default.
/// </summary>
/// <remarks>
/// If the operation is
/// <see cref="NO_OP"/>
/// then the rule is ignored,
/// otherwise
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// is used by default.
/// With this constructor path is considered as not modifying clipping path.
/// <para />
/// See
/// <see cref="PathRenderInfo(System.Collections.Generic.Stack{E}, DETA7.Kernel.Pdf.Canvas.CanvasGraphicsState, DETA7.Kernel.Geom.Path, int, int, bool, int)
/// "/>
/// </remarks>
/// <param name="canvasTagHierarchy">the canvas tag hierarchy</param>
/// <param name="gs">the graphics state</param>
/// <param name="path">the path to be rendered</param>
/// <param name="operation">
/// one of the possible combinations of
/// <see cref="STROKE"/>
/// and
/// <see cref="FILL"/>
/// values or
/// <see cref="NO_OP"/>
/// </param>
public PathRenderInfo(Stack<CanvasTag> canvasTagHierarchy, CanvasGraphicsState gs, Path path, int operation )
: this(canvasTagHierarchy, gs, path, operation, PdfCanvasConstants.FillingRule.NONZERO_WINDING, false, PdfCanvasConstants.FillingRule
.NONZERO_WINDING)
{
}
/// <summary>
/// Gets the
/// <see cref="DETA7.Kernel.Geom.Path"/>
/// to be rendered
/// </summary>
/// <returns>
/// the
/// <see cref="DETA7.Kernel.Geom.Path"/>
/// to be rendered
/// </returns>
public virtual Path GetPath()
{
return path;
}
/// <summary>
/// Gets the
/// <c>int</c>
/// value which is either
/// <see cref="NO_OP"/>
/// or one of possible
/// combinations of
/// <see cref="STROKE"/>
/// and
/// <see cref="FILL"/>.
/// </summary>
/// <returns>the operation value</returns>
public virtual int GetOperation()
{
return operation;
}
/// <summary>
/// Gets either
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// or
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.EVEN_ODD"/>.
/// </summary>
/// <returns>the rule value</returns>
public virtual int GetRule()
{
return rule;
}
/// <summary>Gets the clipping path flag.</summary>
/// <returns>
///
/// <see langword="true"/>
/// indicates that current path modifies the clipping path
/// </returns>
public virtual bool IsPathModifiesClippingPath()
{
return isClip;
}
/// <summary>
/// Gets either
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// or
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.EVEN_ODD"/>.
/// </summary>
/// <returns>the clipping rule value</returns>
public virtual int GetClippingRule()
{
return clippingRule;
}
/// <summary>Gets the current transformation matrix.</summary>
/// <returns>
/// the current transformation
/// <see cref="DETA7.Kernel.Geom.Matrix">matrix</see>
/// </returns>
public virtual Matrix GetCtm()
{
CheckGraphicsState();
return gs.GetCtm();
}// public virtual Matrix GetCtm()
/// <summary>Gets the path's line width.</summary>
/// <returns>the path's line width</returns>
public virtual float GetLineWidth()
{
CheckGraphicsState();
return gs.GetLineWidth();
}
/// <summary>Gets the line cap style.</summary>
/// <remarks>
/// Gets the line cap style. See
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.LineCapStyle"/>.
/// </remarks>
/// <returns>the line cap style value</returns>
public virtual int GetLineCapStyle()
{
CheckGraphicsState();
return gs.GetLineCapStyle();
}
/// <summary>Gets the line join style.</summary>
/// <remarks>
/// Gets the line join style. See
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.LineJoinStyle"/>.
/// </remarks>
/// <returns>the line join style value</returns>
public virtual int GetLineJoinStyle()
{
CheckGraphicsState();
return gs.GetLineJoinStyle();
}
/// <summary>Gets the miter limit.</summary>
/// <returns>the miter limit</returns>
public virtual float GetMiterLimit()
{
CheckGraphicsState();
return gs.GetMiterLimit();
}
/// <summary>Gets the path's dash pattern.</summary>
/// <returns>
/// the path's dash pattern as a
/// <see cref="DETA7.Kernel.Pdf.PdfArray"/>
/// </returns>
public virtual PdfArray GetLineDashPattern()
{
CheckGraphicsState();
return gs.GetDashPattern();
}
/// <summary>Gets the path's stroke color.</summary>
/// <returns>
/// the path's stroke
/// <see cref="DETA7.Kernel.Colors.Color">color</see>
/// </returns>
public virtual Color GetStrokeColor()
{
CheckGraphicsState();
return gs.GetStrokeColor();
}
/// <summary>Gets the path's fill color.</summary>
/// <returns>
/// the path's fill
/// <see cref="DETA7.Kernel.Colors.Color">color</see>
/// </returns>
public virtual Color GetFillColor()
{
CheckGraphicsState();
return gs.GetFillColor();
}
/// <summary>Gets hierarchy of the canvas tags that wraps given text.</summary>
/// <returns>list of the wrapping canvas tags. The first tag is the innermost (nearest to the text)</returns>
public virtual IList<CanvasTag> GetCanvasTagHierarchy()
{
return canvasTagHierarchy;
}
/// <summary>
/// Gets the marked-content identifier associated with this
/// <see cref="PathRenderInfo"/>
/// instance
/// </summary>
/// <returns>associated marked-content identifier or -1 in case content is unmarked</returns>
public virtual int GetMcid()
{
foreach (CanvasTag tag in canvasTagHierarchy)
{
if (tag.HasMcid())
{
return tag.GetMcid();
}
}
return -1;
}
/// <summary>
/// Checks if this
/// <see cref="PathRenderInfo"/>
/// instance belongs to a marked content sequence
/// with a given mcid.
/// </summary>
/// <param name="mcid">a marked content id</param>
/// <returns>
///
/// <see langword="true"/>
/// if this
/// <see cref="PathRenderInfo"/>
/// instance is marked with this id,
/// <see langword="false"/>
/// otherwise
/// </returns>
public virtual bool HasMcid(int mcid)
{
return HasMcid(mcid, false);
}
/// <summary>
/// Checks if this
/// <see cref="PathRenderInfo"/>
/// instance belongs to a marked content sequence
/// with a given mcid.
/// </summary>
/// <param name="mcid">a marked content id</param>
/// <param name="checkTheTopmostLevelOnly">indicates whether to check the topmost level of marked content stack only
/// </param>
/// <returns>
///
/// <see langword="true"/>
/// if this
/// <see cref="PathRenderInfo"/>
/// instance is marked with this id,
/// <see langword="false"/>
/// otherwise
/// </returns>
public virtual bool HasMcid(int mcid, bool checkTheTopmostLevelOnly)
{
if (checkTheTopmostLevelOnly)
{
if (canvasTagHierarchy != null)
{
int infoMcid = GetMcid();
return infoMcid != -1 && infoMcid == mcid;
}
}
else
{
foreach (CanvasTag tag in canvasTagHierarchy)
{
if (tag.HasMcid())
{
if (tag.GetMcid() == mcid)
{
return true;
}
}
}
}
return false;
}
}
}
using System;
using DETA7.Kernel.Pdf;
using DETA7.Kernel.Pdf.Tagging;
namespace DETA7.Kernel.Pdf.Canvas {
/// <summary>This class represents a single tag on a single piece of marked content.</summary>
/// <remarks>
/// This class represents a single tag on a single piece of marked content.
/// <para />
/// In Tagged PDF, a tag is the basic structure unit for marking content. The tag
/// structure and hierarchy is largely comparable to HTML. As in HTML, every tag
/// type has a name, defined here in the <c>role</c> attribute. The tagging
/// mechanism in Tagged PDF is extensible, so PDF creators can choose to create
/// custom tags.
/// </remarks>
public class CanvasTag {
/// <summary>The type of the tag.</summary>
public PdfName role;
/// <summary>The properties of the tag.</summary>
public PdfDictionary properties;
/// <summary>
/// Creates a tag that is referenced to the document's tag structure (i.e.
/// logical structure).
/// </summary>
/// <param name="role">the type of tag</param>
public CanvasTag(PdfName role) {
this.role = role;
}
/// <summary>
/// Creates a tag that is referenced to the document's tag structure (i.e.
/// logical structure).
/// </summary>
/// <param name="role">the type of tag</param>
/// <param name="mcid">marked content id which serves as a reference to the document's logical structure</param>
public CanvasTag(PdfName role, int mcid) {
this.role = role;
AddProperty(PdfName.MCID, new PdfNumber(mcid));
}
/// <summary>
/// Creates a tag that is referenced to the document's tag structure (i.e.
/// logical structure).
/// </summary>
/// <param name="mcr">
/// the
/// <see cref="DETA7.Kernel.Pdf.Tagging.PdfMcr">Marked Content Reference</see>
/// wrapper object
/// </param>
public CanvasTag(PdfMcr mcr)
: this(mcr.GetRole(), mcr.GetMcid()) {
}
/// <summary>Get the role of the tag.</summary>
/// <returns>the role of the tag as a PdfName</returns>
public virtual PdfName GetRole() {
return role;
}
/// <summary>Get the marked content id of the tag.</summary>
/// <returns>marked content id</returns>
public virtual int GetMcid() {
int mcid = -1;
if (properties != null) {
mcid = (int)properties.GetAsInt(PdfName.MCID);
}
if (mcid == -1) {
throw new InvalidOperationException("CanvasTag has no MCID");
}
return mcid;
}
/// <summary>Determine if an MCID is available</summary>
/// <returns>true if the MCID is available, false otherwise</returns>
public virtual bool HasMcid() {
return properties != null && properties.ContainsKey(PdfName.MCID);
}
/// <summary>
/// Sets a dictionary of properties to the
/// <see cref="CanvasTag">tag</see>
/// 's properties.
/// </summary>
/// <remarks>
/// Sets a dictionary of properties to the
/// <see cref="CanvasTag">tag</see>
/// 's properties. All existing properties (if any) will be lost.
/// </remarks>
/// <param name="properties">a dictionary</param>
/// <returns>
/// current
/// <see cref="CanvasTag"/>
/// </returns>
public virtual DETA7.Kernel.Pdf.Canvas.CanvasTag SetProperties(PdfDictionary properties) {
this.properties = properties;
return this;
}
/// <summary>
/// Adds a single property to the
/// <see cref="CanvasTag">tag</see>
/// 's properties.
/// </summary>
/// <param name="name">a key</param>
/// <param name="value">the value for the key</param>
/// <returns>
/// current
/// <see cref="CanvasTag"/>
/// </returns>
public virtual DETA7.Kernel.Pdf.Canvas.CanvasTag AddProperty(PdfName name, PdfObject value) {
EnsurePropertiesInit();
properties.Put(name, value);
return this;
}
/// <summary>
/// Removes a single property from the
/// <see cref="CanvasTag">tag</see>
/// 's properties.
/// </summary>
/// <param name="name">the key of the key-value pair to be removed</param>
/// <returns>
/// current
/// <see cref="CanvasTag"/>
/// </returns>
public virtual DETA7.Kernel.Pdf.Canvas.CanvasTag RemoveProperty(PdfName name) {
if (properties != null) {
properties.Remove(name);
}
return this;
}
/// <summary>
/// Gets a property from the
/// <see cref="CanvasTag">tag</see>
/// 's properties dictionary.
/// </summary>
/// <param name="name">the key of the key-value pair to be retrieved</param>
/// <returns>the value corresponding to the key</returns>
public virtual PdfObject GetProperty(PdfName name) {
if (properties == null) {
return null;
}
return properties.Get(name);
}
/// <summary>Get the properties of the tag.</summary>
/// <returns>properties of the tag</returns>
public virtual PdfDictionary GetProperties() {
return properties;
}
/// <summary>Gets value of /ActualText property.</summary>
/// <returns>
/// actual text value or
/// <see langword="null"/>
/// if actual text is not defined
/// </returns>
public virtual String GetActualText() {
return GetPropertyAsString(PdfName.ActualText);
}
public virtual String GetExpansionText() {
return GetPropertyAsString(PdfName.E);
}
public String GetPropertyAsString(PdfName name) {
PdfString text = null;
if (properties != null) {
text = properties.GetAsString(name);
}
String result = null;
if (text != null) {
result = text.ToUnicodeString();
}
return result;
}
public void EnsurePropertiesInit() {
if (properties == null) {
properties = new PdfDictionary();
}
}
}
}
using System;
using DETA7.Kernel.Pdf.Canvas;
namespace DETA7.Kernel.Pdf.Canvas.Parser.Data
{
public class AbstractRenderInfo : IEventData
{
public CanvasGraphicsState gs;
public bool graphicsStateIsPreserved;
public AbstractRenderInfo(CanvasGraphicsState gs)
{
this.gs = gs;
}
public virtual CanvasGraphicsState GetGraphicsState()
{
CheckGraphicsState();
return graphicsStateIsPreserved ? gs : new CanvasGraphicsState(gs);
}
public virtual bool IsGraphicsStatePreserved()
{
return graphicsStateIsPreserved;
}
public virtual void PreserveGraphicsState()
{
CheckGraphicsState();
this.graphicsStateIsPreserved = true;
gs = new CanvasGraphicsState(gs);
}
public virtual void ReleaseGraphicsState()
{
if (!graphicsStateIsPreserved)
{
gs = null;
}
}
// check if graphics state was released
public virtual void CheckGraphicsState()
{
if (null == gs)
{
throw new InvalidOperationException(DETA7.IO.Logs.IoLogMessageConstant.GRAPHICS_STATE_WAS_DELETED);
}
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
using DETA7.Commons.Utils;
using DETA7.IO.Font;
using DETA7.IO.Font.Otf;
using DETA7.Kernel.Colors;
using DETA7.Kernel.Font;
using DETA7.Kernel.Geom;
using DETA7.Kernel.Pdf;
using DETA7.Kernel.Pdf.Canvas;
namespace DETA7.Kernel.Pdf.Canvas.Parser.Data
{
/// <summary>
/// Provides information and calculations needed by render listeners
/// to display/evaluate text render operations.
/// </summary>
/// <remarks>
/// Provides information and calculations needed by render listeners
/// to display/evaluate text render operations.
/// <br /><br />
/// This is passed between the
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.PdfCanvasProcessor"/>
/// and
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener"/>
/// objects as text rendering operations are
/// discovered
/// </remarks>
public class TextRenderInfo : AbstractRenderInfo
{
public PdfString @string;
public String text = null;
public Matrix textToUserSpaceTransformMatrix;
public Matrix textMatrix;
public float unscaledWidth = float.NaN;
/// <summary>Hierarchy of nested canvas tags for the text from the most inner (nearest to text) tag to the most outer.
/// </summary>
public IList<CanvasTag> canvasTagHierarchy;
public float[] SAAN_FLOAT_ARRAY = { };
/// <summary>Creates a new TextRenderInfo object</summary>
/// <param name="str">the PDF string that should be displayed</param>
/// <param name="gs">the graphics state (note: at this time, this is not immutable, so don't cache it)</param>
/// <param name="textMatrix">the text matrix at the time of the render operation</param>
/// <param name="canvasTagHierarchy">the marked content tags sequence, if available</param>
public TextRenderInfo(PdfString str, CanvasGraphicsState gs, Matrix textMatrix, Stack<CanvasTag> canvasTagHierarchy )
: base(gs)
{
this.@string = str;
this.textToUserSpaceTransformMatrix = textMatrix.Multiply(gs.GetCtm());
this.textMatrix = textMatrix;
this.canvasTagHierarchy
=
JavaCollectionsUtil.UnmodifiableList<CanvasTag>
(new List<CanvasTag>(canvasTagHierarchy));
}
/// <summary>Used for creating sub-TextRenderInfos for each individual character.</summary>
/// <param name="parent">the parent TextRenderInfo</param>
/// <param name="str">the content of a TextRenderInfo</param>
/// <param name="horizontalOffset">the unscaled horizontal offset of the character that this TextRenderInfo represents
/// </param>
public TextRenderInfo(DETA7.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo parent, PdfString str, float horizontalOffset )
: base(parent.gs)
{
this.@string = str;
Matrix offsetMatrix = new Matrix(horizontalOffset, 0);
this.textToUserSpaceTransformMatrix = offsetMatrix.Multiply(parent.textToUserSpaceTransformMatrix);
this.textMatrix = offsetMatrix.Multiply(parent.textMatrix);
this.canvasTagHierarchy = parent.canvasTagHierarchy;
}
//GetCTM is not there in TextRenderInfo
/// <summary>Gets the current transformation matrix.</summary>
/// <returns>
/// the current transformation
/// <see cref="DETA7.Kernel.Geom.Matrix">matrix</see>
/// </returns>
public virtual Matrix SAAN_Adds_GetCtm()
{
CheckGraphicsState();
return gs.GetCtm();
}// public virtual Matrix SAAN_Adds_GetCtm()
/// <summary>Gets the text to be rendered according to canvas operators.</summary>
/// <returns>the text to render</returns>
public virtual String GetText()
{
CheckGraphicsState();
if (text == null)
{
GlyphLine gl = gs.GetFont().DecodeIntoGlyphLine(@string);
if (!IsReversedChars())
{
text = gl.ToUnicodeString(gl.GetStart(), gl.GetEnd());
}
else
{
StringBuilder sb = new StringBuilder(gl.GetEnd() - gl.GetStart());
for (int i = gl.GetEnd() - 1; i >= gl.GetStart(); i--)
{
sb.Append(gl.Get(i).GetUnicodeChars());
}
text = sb.ToString();
}
}
return text;
}
/// <returns>original PDF string</returns>
public virtual PdfString GetPdfString()
{
return @string;
}
/// <summary>Gets original Text matrix.</summary>
/// <returns>text matrix.</returns>
public virtual Matrix GetTextMatrix()
{
return textMatrix;
}
/// <summary>
/// Checks if this
/// <see cref="TextRenderInfo"/>
/// instance belongs to a marked content sequence
/// with a given mcid.
/// </summary>
/// <param name="mcid">a marked content id</param>
/// <returns>
///
/// <see langword="true"/>
/// if this
/// <see cref="TextRenderInfo"/>
/// instance is marked with this id,
/// <see langword="false"/>
/// otherwise
/// </returns>
public virtual bool HasMcid(int mcid)
{
return HasMcid(mcid, false);
}
/// <summary>
/// Checks if this
/// <see cref="TextRenderInfo"/>
/// instance belongs to a marked content sequence
/// with a given mcid.
/// </summary>
/// <param name="mcid">a marked content id</param>
/// <param name="checkTheTopmostLevelOnly">indicates whether to check the topmost level of marked content stack only
/// </param>
/// <returns>
///
/// <see langword="true"/>
/// if this
/// <see cref="TextRenderInfo"/>
/// instance is marked with this id,
/// <see langword="false"/>
/// otherwise
/// </returns>
public virtual bool HasMcid(int mcid, bool checkTheTopmostLevelOnly)
{
if (checkTheTopmostLevelOnly)
{
if (canvasTagHierarchy != null)
{
int infoMcid = GetMcid();
return infoMcid != -1 && infoMcid == mcid;
}
}
else
{
foreach (CanvasTag tag in canvasTagHierarchy)
{
if (tag.HasMcid())
{
if (tag.GetMcid() == mcid)
{
return true;
}
}
}
}
return false;
}
/// <summary>
/// Gets the marked-content identifier associated with this
/// <see cref="TextRenderInfo"/>
/// instance
/// </summary>
/// <returns>associated marked-content identifier or -1 in case content is unmarked</returns>
public virtual int GetMcid()
{
foreach (CanvasTag tag in canvasTagHierarchy)
{
if (tag.HasMcid())
{
return tag.GetMcid();
}
}
return -1;
}
/// <summary>
/// Gets the baseline for the text (i.e. the line that the text 'sits' on)
/// This value includes the Rise of the draw operation - see
/// <see cref="GetRise()"/>
/// for the amount added by Rise
/// </summary>
/// <returns>the baseline line segment</returns>
public virtual LineSegment GetBaseline()
{
CheckGraphicsState();
return GetUnscaledBaselineWithOffset(0 + gs.GetTextRise()).TransformBy(textToUserSpaceTransformMatrix);
}
public virtual LineSegment GetUnscaledBaseline()
{
CheckGraphicsState();
return GetUnscaledBaselineWithOffset(0 + gs.GetTextRise());
}
/// <summary>Gets the ascent line for the text (i.e. the line that represents the topmost extent that a string of the current font could have).
/// </summary>
/// <remarks>
/// Gets the ascent line for the text (i.e. the line that represents the topmost extent that a string of the current font could have).
/// This value includes the Rise of the draw operation - see
/// <see cref="GetRise()"/>
/// for the amount added by Rise.
/// </remarks>
/// <returns>a LineSegment instance</returns>
public virtual LineSegment GetAscentLine()
{
CheckGraphicsState();
return GetUnscaledBaselineWithOffset(GetAscentDescent()[0] + gs.GetTextRise()).TransformBy(textToUserSpaceTransformMatrix);
}// public virtual LineSegment GetAscentLine()
/// <summary>Gets the descent line for the text (i.e. the line that represents the bottom most extent that a string of the current font could have).
/// </summary>
/// <remarks>
/// Gets the descent line for the text (i.e. the line that represents the bottom most extent that a string of the current font could have).
/// This value includes the Rise of the draw operation - see
/// <see cref="GetRise()"/>
/// for the amount added by Rise.
/// </remarks>
/// <returns>a LineSegment instance</returns>
public virtual LineSegment GetDescentLine()
{
CheckGraphicsState();
return GetUnscaledBaselineWithOffset(GetAscentDescent()[1] + gs.GetTextRise()).TransformBy(textToUserSpaceTransformMatrix);
}// public virtual LineSegment GetDescentLine()
/// <summary>Getter for the font</summary>
/// <returns>the font</returns>
public virtual PdfFont GetFont()
{
CheckGraphicsState();
return gs.GetFont();
}
/// <summary>The rise represents how far above the nominal baseline the text should be rendered.</summary>
/// <remarks>
/// The rise represents how far above the nominal baseline the text should be rendered. The
/// <see cref="GetBaseline()"/>
/// ,
/// <see cref="GetAscentLine()"/>
/// and
/// <see cref="GetDescentLine()"/>
/// methods already include Rise.
/// This method is exposed to allow listeners to determine if an explicit rise was involved in the computation of the baseline (this might be useful, for example, for identifying superscript rendering)
/// </remarks>
/// <returns>The Rise for the text draw operation, in user space units (Ts value, scaled to user space)</returns>
public virtual float GetRise()
{
CheckGraphicsState();
// optimize the common case
if (gs.GetTextRise() == 0)
{
return 0;
}
return ConvertHeightFromTextSpaceToUserSpace(gs.GetTextRise());
}
/// <summary>Provides detail useful if a listener needs access to the position of each individual glyph in the text render operation
/// </summary>
/// <returns>
/// A list of
/// <see cref="TextRenderInfo"/>
/// objects that represent each glyph used in the draw operation. The next effect is if there was a separate Tj opertion for each character in the rendered string
/// </returns>
public virtual IList<DETA7.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo> GetCharacterRenderInfos()
{
CheckGraphicsState();
IList<DETA7.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo> rslt
=
new List<DETA7.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo>
(@string.GetValue().Length);
PdfString[] strings = SplitString(@string);
float totalWidth = 0;
foreach (PdfString str in strings)
{
float[] widthAndWordSpacing = GetWidthAndWordSpacing(str);
DETA7.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo subInfo = new DETA7.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo
(this, str, totalWidth);
rslt.Add(subInfo);
totalWidth += (widthAndWordSpacing[0] * gs.GetFontSize() + gs.GetCharSpacing() + widthAndWordSpacing[1]) *
(gs.GetHorizontalScaling() / 100f);
}
foreach (DETA7.Kernel.Pdf.Canvas.Parser.Data.TextRenderInfo tri in rslt)
{
tri.GetUnscaledWidth();
}
return rslt;
}
/// <returns>The width, in user space units, of a single space character in the current font</returns>
public virtual float GetSingleSpaceWidth()
{
return ConvertWidthFromTextSpaceToUserSpace(GetUnscaledFontSpaceWidth());
}
/// <returns>
/// the text render mode that should be used for the text. From the
/// PDF specification, this means:
/// <list type="bullet">
/// <item><description>0 = Fill text
/// </description></item>
/// <item><description>1 = Stroke text
/// </description></item>
/// <item><description>2 = Fill, then stroke text
/// </description></item>
/// <item><description>3 = Invisible
/// </description></item>
/// <item><description>4 = Fill text and add to path for clipping
/// </description></item>
/// <item><description>5 = Stroke text and add to path for clipping
/// </description></item>
/// <item><description>6 = Fill, then stroke text and add to path for clipping
/// </description></item>
/// <item><description>7 = Add text to padd for clipping
/// </description></item>
/// </list>
/// </returns>
public virtual int GetTextRenderMode()
{
CheckGraphicsState();
return gs.GetTextRenderingMode();
}
/// <returns>the current fill color.</returns>
public virtual Color GetFillColor()
{
CheckGraphicsState();
return gs.GetFillColor();
}
/// <returns>the current stroke color.</returns>
public virtual Color GetStrokeColor()
{
CheckGraphicsState();
return gs.GetStrokeColor();
}
public virtual float GetFontSize()
{
CheckGraphicsState();
return gs.GetFontSize();
}
public virtual float GetHorizontalScaling()
{
CheckGraphicsState();
return gs.GetHorizontalScaling();
}
public virtual float GetCharSpacing()
{
CheckGraphicsState();
return gs.GetCharSpacing();
}
public virtual float GetWordSpacing()
{
CheckGraphicsState();
return gs.GetWordSpacing();
}
public virtual float GetLeading()
{
CheckGraphicsState();
return gs.GetLeading();
}
/// <summary>Gets /ActualText tag entry value if this text chunk is marked content.</summary>
/// <returns>/ActualText value or <c>null</c> if none found</returns>
public virtual String GetActualText()
{
String lastActualText = null;
foreach (CanvasTag tag in canvasTagHierarchy)
{
lastActualText = tag.GetActualText();
if (lastActualText != null)
{
break;
}
}
return lastActualText;
}
/// <summary>Gets /E tag (expansion text) entry value if this text chunk is marked content.</summary>
/// <returns>/E value or <c>null</c> if none found</returns>
public virtual String GetExpansionText()
{
String expansionText = null;
foreach (CanvasTag tag in canvasTagHierarchy)
{
expansionText = tag.GetExpansionText();
if (expansionText != null)
{
break;
}
}
return expansionText;
}
/// <summary>
/// Determines if the text represented by this
/// <see cref="TextRenderInfo"/>
/// instance is written in a text showing operator
/// wrapped by /ReversedChars marked content sequence
/// </summary>
/// <returns><c>true</c> if this text block lies within /ReversedChars block, <c>false</c> otherwise</returns>
public virtual bool IsReversedChars()
{
foreach (CanvasTag tag in canvasTagHierarchy)
{
if (tag != null)
{
if (PdfName.ReversedChars.Equals(tag.GetRole()))
{
return true;
}
}
}
return false;
}
/// <summary>Gets hierarchy of the canvas tags that wraps given text.</summary>
/// <returns>list of the wrapping canvas tags. The first tag is the innermost (nearest to the text).</returns>
public virtual IList<CanvasTag> GetCanvasTagHierarchy()
{
return canvasTagHierarchy;
}
/// <returns>the unscaled (i.e. in Text space) width of the text</returns>
public virtual float GetUnscaledWidth()
{
if (float.IsNaN(unscaledWidth))
{
unscaledWidth = GetPdfStringWidth(@string, false);
}
return unscaledWidth;
}
public LineSegment GetUnscaledBaselineWithOffset(float yOffset)
{
CheckGraphicsState();
// we need to correct the width so we don't have an extra character and word spaces at the end. The extra character and word spaces
// are important for tracking relative text coordinate systems, but should not be part of the baseline
String unicodeStr = @string.ToUnicodeString();
float correctedUnscaledWidth = GetUnscaledWidth() - (gs.GetCharSpacing() + (unicodeStr.Length > 0 && unicodeStr
[unicodeStr.Length - 1] == ' ' ? gs.GetWordSpacing() : 0)) * (gs.GetHorizontalScaling() / 100f);
return new LineSegment(new Vector(0, yOffset, 1), new Vector(correctedUnscaledWidth, yOffset, 1));
}
/// <param name="width">the width, in text space</param>
/// <returns>the width in user space</returns>
public float ConvertWidthFromTextSpaceToUserSpace(float width)
{
LineSegment textSpace = new LineSegment(new Vector(0, 0, 1), new Vector(width, 0, 1));
LineSegment userSpace = textSpace.TransformBy(textToUserSpaceTransformMatrix);
return userSpace.GetLength();
}
/// <param name="height">the height, in text space</param>
/// <returns>the height in user space</returns>
public float ConvertHeightFromTextSpaceToUserSpace(float height)
{
LineSegment textSpace = new LineSegment(new Vector(0, 0, 1), new Vector(0, height, 1));
LineSegment userSpace = textSpace.TransformBy(textToUserSpaceTransformMatrix);
return userSpace.GetLength();
}
/// <summary>Calculates the width of a space character in text space units.</summary>
/// <returns>the width of a single space character in text space units</returns>
public float GetUnscaledFontSpaceWidth()
{
CheckGraphicsState();
char spaceChar = ' ';
int charWidth = gs.GetFont().GetWidth(spaceChar);
if (charWidth == 0) {
charWidth = gs.GetFont().GetFontProgram().GetAvgWidth();
}
float charWidthInGlyphSpace = FontProgram.ConvertTextSpaceToGlyphSpace(charWidth);
return (charWidthInGlyphSpace * gs.GetFontSize() + gs.GetCharSpacing() + gs.GetWordSpacing()) * gs.GetHorizontalScaling
() / 100F;
}
/// <summary>Gets the width of a PDF string in text space units</summary>
/// <param name="string">the string that needs measuring</param>
/// <returns>the width of a String in text space units</returns>
public float GetPdfStringWidth(PdfString @string, bool singleCharString)
{
CheckGraphicsState();
if (singleCharString)
{
float[] widthAndWordSpacing = GetWidthAndWordSpacing(@string);
return (float)(((double)widthAndWordSpacing[0] * (double)gs.GetFontSize() + (double)gs.GetCharSpacing() +
(double)widthAndWordSpacing[1]) * (double)gs.GetHorizontalScaling() / 100f);
}
else
{
float totalWidth = 0;
foreach (PdfString str in SplitString(@string))
{
totalWidth += GetPdfStringWidth(str, true);
}
return totalWidth;
}
}
/// <summary>Calculates width and word spacing of a single character PDF string.</summary>
/// <remarks>
/// Calculates width and word spacing of a single character PDF string.
/// IMPORTANT: Shall ONLY be used for a single character pdf strings.
/// </remarks>
/// <param name="string">a character to calculate width.</param>
/// <returns>array of 2 items: first item is a character width, second item is a calculated word spacing.</returns>
public float[] GetWidthAndWordSpacing(PdfString @string)
{
CheckGraphicsState();
return new float[] { FontProgram.ConvertTextSpaceToGlyphSpace(gs.GetFont().GetContentWidth(@string)), " ".
Equals(@string.GetValue()) ? gs.GetWordSpacing() : 0 };
}
/// <summary>Converts a single character string to char code.</summary>
/// <param name="string">single character string to convert to.</param>
/// <returns>char code.</returns>
public int GetCharCode(String @string)
{
try {
byte[] b = @string.GetBytes("UTF-16BE");
int value = 0;
for (int i = 0; i < b.Length - 1; i++)
{
value += b[i] & 0xff;
value <<= 8;
}
if (b.Length > 0)
{
value += b[b.Length - 1] & 0xff;
}
return value;
}
catch (ArgumentException)
{
}
return 0;
}
/// <summary>Split PDF string into array of single character PDF strings.</summary>
/// <param name="string">PDF string to be split.</param>
/// <returns>split PDF string.</returns>
public PdfString[] SplitString(PdfString @string)
{
CheckGraphicsState();
PdfFont font = gs.GetFont();
if (font is PdfType0Font)
{
// Number of bytes forming one glyph can be arbitrary from [1; 4] range
IList<PdfString> strings = new List<PdfString>();
GlyphLine glyphLine = gs.GetFont().DecodeIntoGlyphLine(@string);
for (int i = glyphLine.GetStart(); i < glyphLine.GetEnd(); i++)
{
strings.Add(new PdfString(gs.GetFont().ConvertToBytes(glyphLine.Get(i))));
}
return strings.ToArray(new PdfString[strings.Count]);
}
else
{
// One byte corresponds to one character
PdfString[] strings = new PdfString[@string.GetValue().Length];
for (int i = 0; i < @string.GetValue().Length; i++)
{
strings[i] = new PdfString(@string.GetValue().JSubstring(i, i + 1), @string.GetEncoding());
}
return strings;
}
}
public float[] GetAscentDescent()
{
CheckGraphicsState();
float ascent
=
gs.GetFont()
.GetFontProgram()
.GetFontMetrics()
.GetTypoAscender();
float descent
=
gs.GetFont()
.GetFontProgram()
.GetFontMetrics()
.GetTypoDescender();
// If descent is positive, we consider it a bug and fix it
if (descent > 0)
{
descent = -descent;
}
float scale = (ascent - descent < 700) ? (ascent - descent) : FontProgram.UNITS_NORMALIZATION;
descent = descent / scale * gs.GetFontSize();
ascent = ascent / scale * gs.GetFontSize();
return new float[] { ascent, descent };
}
}
}
using System;
using System.Collections.Generic;
using Microsoft.Extensions.Logging;
using DETA7.Commons;
using DETA7.Commons.Utils;
using DETA7.IO.Font;
using DETA7.IO.Source;
using DETA7.Kernel.Colors;
using DETA7.Kernel.Exceptions;
using DETA7.Kernel.Font;
using DETA7.Kernel.Geom;
using DETA7.Kernel.Logs;
using DETA7.Kernel.Pdf;
using DETA7.Kernel.Pdf.Canvas;
using DETA7.Kernel.Pdf.Canvas.Parser.Data;
using DETA7.Kernel.Pdf.Canvas.Parser.Listener;
using DETA7.Kernel.Pdf.Canvas.Parser.Util;
using DETA7.Kernel.Pdf.Colorspace;
using DETA7.Kernel.Pdf.Extgstate;
using System.Text;
//saan finds that this class is used to read the pdf files and to parse the pdf files
namespace DETA7.Kernel.Pdf.Canvas.Parser
{
/// <summary>Processor for a PDF content stream.</summary>
public class PdfCanvasProcessor
{
public static double PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS = 0;
public static double PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING = 0;
public static StringBuilder PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
= new StringBuilder();
public static bool PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG = true;
// a b 0
// c d 0
// e f 1
// matrix.Get(Matrix.I11) IS a
// matrix.Get(Matrix.I12) IS c
// matrix.Get(Matrix.I13) IS e
// matrix.Get(Matrix.I21) IS b
// matrix.Get(Matrix.I22) IS d
// matrix.Get(Matrix.I23) IS f
// matrix.Get(Matrix.I31) IS 0
// matrix.Get(Matrix.I32) IS 0
// matrix.Get(Matrix.I33) IS 1
public static float CTM_SAAN__a = 0;// ((PdfNumber)operands[0]).FloatValue();
public static float CTM_SAAN__b = 0;// ((PdfNumber)operands[1]).FloatValue();
public static float CTM_SAAN__c = 0;// ((PdfNumber)operands[2]).FloatValue();
public static float CTM_SAAN__d = 0;// ((PdfNumber)operands[3]).FloatValue();
public static float CTM_SAAN__e = 0;// ((PdfNumber)operands[4]).FloatValue();
public static float CTM_SAAN__f = 0;// ((PdfNumber)operands[5]).FloatValue();
public static StringBuilder PUBLIC_STATIC_STRINGBUILDER___SAAN___kernel_pdf_canvas_parser_PdfCanvasProcessor = new StringBuilder();
public const String DEFAULT_OPERATOR = "DefaultOperator";
/// <summary>Listener that will be notified of render events</summary>
public IEventListener eventListener;
/// <summary>
/// Cache supported events in case the user's
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener.GetSupportedEvents()"/>
/// method is not very efficient
/// </summary>
public ICollection<EventType> supportedEvents;
public Path currentPath = new Path();
/// <summary>
/// Indicates whether the current clipping path should be modified by
/// intersecting it with the current path.
/// </summary>
public bool isClip;
/// <summary>
/// Specifies the filling rule which should be applied while calculating
/// new clipping path.
/// </summary>
public int clippingRule;
/// <summary>A map with all supported operators (PDF syntax).</summary>
public IDictionary<String, IContentOperator> operators;
/// <summary>Resources for the content stream.</summary>
/// <remarks>
/// Resources for the content stream.
/// Current resources are always at the top of the stack.
/// Stack is needed in case if some "inner" content stream with it's own resources
/// is encountered (like Form XObject).
/// </remarks>
public IList<PdfResources> resourcesStack;
/// <summary>Stack keeping track of the graphics state.</summary>
public Stack<ParserGraphicsState> gsStack = new Stack<ParserGraphicsState>();
public Matrix textMatrix;
public Matrix textLineMatrix;
/// <summary>A map with all supported XObject handlers</summary>
public IDictionary<PdfName, IXObjectDoHandler> xobjectDoHandlers;
/// <summary>The font cache</summary>
public IDictionary<int, WeakReference> cachedFonts = new Dictionary<int, WeakReference>();
/// <summary>A stack containing marked content info.</summary>
public Stack<CanvasTag> markedContentStack = new Stack<CanvasTag>();
/// <summary>A memory limits handler.</summary>
public MemoryLimitsAwareHandler memoryLimitsHandler = null;
/// <summary>Page size in bytes.</summary>
public long pageSize = 0;
public static int PUBLIC_STATIC_INT_SAAN_CURRENT_PAGE_NUMBER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_double_SAAN_CURRENT_PAGE_WIDTH___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_double_SAAN_CURRENT_PAGE_HEIGHT___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_TEXT_CHUNK_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_TEXT_CHUNK_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_LINESEGMENT_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_LINESEGMENT_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_CURVE_Mm_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_CURVE_Mm_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_CURVE_Ss_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_CURVE_Ss_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static long PUBLIC_STATIC_LONG_OVERALL_IMAGE_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
public static double PUBLIC_STATIC_DOUBLE_PAGEWISE_IMAGE_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
/// <summary>
/// Creates a new PDF Content Stream Processor that will send its output to the
/// designated render listener.
/// </summary>
/// <param name="eventListener">
/// the
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener"/>
/// that will receive rendering notifications
/// </param>
///
public PdfCanvasProcessor(IEventListener eventListener)
{
//saan will use this for the detailed first draft analysis
//after the studying of this logs saan will study the consditions fo process flows and then saan will populate a List
//and then the csv like reporting for sequences of operations will get done after whole flows are dones
PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS ++;
PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG = true;
//rough code to use
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
if (PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
{
if (PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
{
//do nothing
PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;//SAAN THINKS CHECKING NECESSARY
}
else
{
PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
// this is done for the nullity cases
PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
= new StringBuilder();
// PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
// .Clear();
}//end of else of if (PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.AppendLine
(
new string(' ',60- PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS.ToString().Length)+ PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS
+" " +
new string(' ', 60 - PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING.ToString().Length) + PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING
+ " " +
"entered here public PdfCanvasProcessor(IEventListener eventListener) eventListener = " + eventListener.GetType().ToString()
);
}//if(PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
//saan will not use this for every operations this is for different purpose for the seperate list to log reporting at the end of all analysis dones
//saan will use this after all the conditions are properly studied and the sequences of canvas processing are well clarified
PUBLIC_STATIC_STRINGBUILDER___SAAN___kernel_pdf_canvas_parser_PdfCanvasProcessor = new StringBuilder();
// THIS STRINGBUILDER IS FOR SEPERATE PURPOSE AFTER ALL THE CANVAS PROCESSING EVENTS SEQUENCES ARE ALL WELL UNDERSTOOD
//WE WILL POPULATE THINGS TO ORGANIZED LIST OF OBJECTS FOR PARSING AND HANDLING THE ENGINEERING GRAPHICS ON THE FLY AND TO CHANGE COLORS OR OTHER THINGS
//AND WHILE DOING SO WE NEED THE TRACKING OF THE PARSER ACTIVITIES AND THE SEQUENCES OF OPERATIONS WHILE PROCESSING THE CANVAS AND RENDERING FILTERING THINGS
PUBLIC_STATIC_STRINGBUILDER___SAAN___kernel_pdf_canvas_parser_PdfCanvasProcessor.Clear();
//while reading the pdf this is started once
PUBLIC_STATIC_INT_SAAN_CURRENT_PAGE_NUMBER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_double_SAAN_CURRENT_PAGE_WIDTH___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_double_SAAN_CURRENT_PAGE_HEIGHT___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_TEXT_CHUNK_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_TEXT_CHUNK_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_LINESEGMENT_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_LINESEGMENT_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_CURVE_Mm_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_CURVE_Mm_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_CURVE_Ss_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_CURVE_Ss_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_LONG_OVERALL_IMAGE_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
PUBLIC_STATIC_DOUBLE_PAGEWISE_IMAGE_CHOUNTER___IN_CANVAS_PROCESSOR = 0;
this.eventListener = eventListener;
this.supportedEvents = eventListener.GetSupportedEvents();
operators = new Dictionary<String, IContentOperator>();
PopulateOperators();
xobjectDoHandlers = new Dictionary<PdfName, IXObjectDoHandler>();
PopulateXObjectDoHandlers();
Reset();
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
if (Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
{
if (Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
{
//do nothing
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
}
else
{
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
// this is done for the nullity cases
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
= new StringBuilder();
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.Clear();
}//end of else of if (PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.AppendLine
(
new string(' ', 60 - Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS.ToString().Length) + Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS
+ " " +
new string(' ', 60 - Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING.ToString().Length) + Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING
+ " " +
"AFTER this.eventListener = eventListener; AFTER PopulateXObjectDoHandlers(); AFTER this.supportedEvents = eventListener.GetSupportedEvents(); AFTER PopulateOperators(); AFTER this.supportedEvents = eventListener.GetSupportedEvents();"
);
}//if(PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
}// public PdfCanvasProcessor(IEventListener eventListener)
/// <summary>
/// Creates a new PDF Content Stream Processor that will send its output to the
/// designated render listener.
/// </summary>
/// <remarks>
/// Creates a new PDF Content Stream Processor that will send its output to the
/// designated render listener.
/// Also allows registration of custom IContentOperators that can influence
/// how (and whether or not) the PDF instructions will be parsed.
/// </remarks>
/// <param name="eventListener">
/// the
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener"/>
/// that will receive rendering notifications
/// </param>
/// <param name="additionalContentOperators">
/// an optional map of custom
/// <see cref="IContentOperator"/>
/// s for rendering instructions
/// </param>
public PdfCanvasProcessor(IEventListener eventListener, IDictionary<String, IContentOperator> additionalContentOperators )
: this(eventListener)
{
foreach (KeyValuePair<String, IContentOperator> entry in additionalContentOperators)
{
RegisterContentOperator(entry.Key, entry.Value);
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
if (Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
{
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////// non nullity checks and nullity checks are seperate from logging///////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if (Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
{
//do nothing
//since the Stringbuilder is already constructed and we are in the process
//so we will not clear that nor we will construct that if the objects are not null
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
}
else
{
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS++;
// this is done for the nullity cases
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
= new StringBuilder();
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.Clear();
}//end of else of if (PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES != null)
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////// non nullity checks and nullity checks are seperate from logging///////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////// below are the cases where we simply log the things //////////////////////////////////////
Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_STRINGBUILDER_FOR_SAAN_WANTS_TO_LOG_EVERY_SEQUENCE_OF_ACTIVITY_WHILE_READING_PDF_FILES
.AppendLine
(
new string(' ', 60 - Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS.ToString().Length) + Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_EVENTS
+ " " +
new string(' ', 60 - Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING.ToString().Length) + Canvas.Parser.PdfCanvasProcessor.PUBLIC_STATIC_DOUBLE___COUNTER_FOR_CANVAS_PROCESSOR_CURRENT_PAGES_NUMBER_UNDER_PROCESSING
+ " " +
"INSIDE public PdfCanvasProcessor(IEventListener eventListener, IDictionary<String, IContentOperator> additionalContentOperators ) INSIDE foreach (KeyValuePair<String, IContentOperator> entry in additionalContentOperators) entry.Key = " + entry.Key + " entry.Value = "+ entry.Value
);
}//if(PUBLIC_STATIC_BOOL_DO_YOU_NEED_THE_SEQUENCE_OF_PROCESS_LOG)
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
////////////////////// TOO IMPORTANT CODE FOR SAAN CHECKING THE FLOWS OF PDF READING EVENTS OPERATIONS /////////
}// foreach (KeyValuePair<String, IContentOperator> entry in additionalContentOperators)
}// public PdfCanvasProcessor(IEventListener eventListener, IDictionary<String, IContentOperator> additionalContentOperators )
/// <summary>Registers a Do handler that will be called when Do for the provided XObject subtype is encountered during content processing.
/// </summary>
/// <remarks>
/// Registers a Do handler that will be called when Do for the provided XObject subtype is encountered during content processing.
/// <br />
/// If you register a handler, it is a very good idea to pass the call on to the existing registered handler (returned by this call), otherwise you
/// may inadvertently change the public behavior of the processor.
/// </remarks>
/// <param name="xobjectSubType">the XObject subtype this handler will process, or PdfName.DEFAULT for a catch-all handler
/// </param>
/// <param name="handler">the handler that will receive notification when the Do operator for the specified subtype is encountered
/// </param>
/// <returns>the existing registered handler, if any</returns>
public virtual IXObjectDoHandler RegisterXObjectDoHandler(PdfName xobjectSubType, IXObjectDoHandler handler )
{
return xobjectDoHandlers.Put(xobjectSubType, handler);
}// public virtual IXObjectDoHandler RegisterXObjectDoHandler(PdfName xobjectSubType, IXObjectDoHandler handler )
/// <summary>Registers a content operator that will be called when the specified operator string is encountered during content processing.
/// </summary>
/// <remarks>
/// Registers a content operator that will be called when the specified operator string is encountered during content processing.
/// <br />
/// If you register an operator, it is a very good idea to pass the call on to the existing registered operator (returned by this call), otherwise you
/// may inadvertently change the public behavior of the processor.
/// </remarks>
/// <param name="operatorString">the operator id, or DEFAULT_OPERATOR for a catch-all operator</param>
/// <param name="operator">the operator that will receive notification when the operator is encountered</param>
/// <returns>the existing registered operator, if any</returns>
public virtual IContentOperator RegisterContentOperator(String operatorString, IContentOperator @operator)
{
return operators.Put(operatorString, @operator);
}//public virtual IContentOperator RegisterContentOperator(String operatorString, IContentOperator @operator)
/// <summary>
/// Gets the
/// <see cref="System.Collections.ICollection{E}"/>
/// containing all the registered operators strings.
/// </summary>
/// <returns>
///
/// <see cref="System.Collections.ICollection{E}"/>
/// containing all the registered operators strings.
/// </returns>
public virtual ICollection<String> GetRegisteredOperatorStrings()
{
return new List<String>(operators.Keys);
}//public virtual ICollection<String> GetRegisteredOperatorStrings()
/// <summary>Resets the graphics state stack, matrices and resources.</summary>
public virtual void Reset()
{
memoryLimitsHandler = null;
pageSize = 0;
gsStack.Clear();
gsStack.Push(new ParserGraphicsState());
textMatrix = null;
textLineMatrix = null;
resourcesStack = new List<PdfResources>();
isClip = false;
currentPath = new Path();
}// public virtual void Reset()
/// <summary>
/// Gets the current
/// <see cref="ParserGraphicsState"/>
/// </summary>
/// <returns>
/// the current
/// <see cref="ParserGraphicsState"/>
/// </returns>
public virtual ParserGraphicsState GetGraphicsState()
{
return gsStack.Peek();
}// public virtual ParserGraphicsState GetGraphicsState()
/// <summary>Processes PDF syntax.</summary>
/// <remarks>
/// Processes PDF syntax.
/// <b>Note:</b> If you re-use a given
/// <see cref="PdfCanvasProcessor"/>
/// , you must call
/// <see cref="Reset()"/>
/// </remarks>
/// <param name="contentBytes">the bytes of a content stream</param>
/// <param name="resources">the resources of the content stream. Must not be null.</param>
public virtual void ProcessContent(byte[] contentBytes, PdfResources resources)
{
// saan tested this ok and found that it is working and to make it faster Console.WriteLine("TOOOOOOOO IMPORTANT kernel_pdf_canvas_parser_PdfCanvasProcessor.cs inside public virtual void ProcessContent(byte[] contentBytes, PdfResources resources)");
if (resources == null)
{
throw new PdfException(KernelExceptionMessageConstant.RESOURCES_CANNOT_BE_NULL);
}
if (memoryLimitsHandler != null)
{
pageSize += (long)contentBytes.Length;
memoryLimitsHandler.CheckIfPageSizeExceedsTheLimit(this.pageSize);
}//if (memoryLimitsHandler != null)
this.resourcesStack.Add(resources);
PdfTokenizer tokeniser
=
new
PdfTokenizer
(new RandomAccessFileOrArray
(new RandomAccessSourceFactory()
.CreateSource (contentBytes)));
PdfCanvasParser ps = new PdfCanvasParser(tokeniser, resources);
IList<PdfObject> operands = new List<PdfObject>();
try
{
while (ps.Parse(operands).Count > 0)
{
PdfLiteral @operator = (PdfLiteral)operands[operands.Count - 1];
InvokeOperator(@operator, operands);
} //while (ps.Parse(operands).Count > 0)
}
catch (System.IO.IOException e)
{
throw new PdfException(KernelExceptionMessageConstant.CANNOT_PARSE_CONTENT_STREAM, e);
}
this.resourcesStack.JRemoveAt(resourcesStack.Count - 1);
}// public virtual void ProcessContent(byte[] contentBytes, PdfResources resources)
/// <summary>Processes PDF syntax.</summary>
/// <remarks>
/// Processes PDF syntax.
/// <strong>Note:</strong> If you re-use a given
/// <see cref="PdfCanvasProcessor"/>
/// , you must call
/// <see cref="Reset()"/>
/// </remarks>
/// <param name="page">the page to process</param>
public virtual void ProcessPageContent(PdfPage page)
{
this.memoryLimitsHandler
=
page
.GetDocument()
.GetMemoryLimitsAwareHandler();
InitClippingPath(page);
ParserGraphicsState gs = GetGraphicsState();
EventOccurred(new ClippingPathInfo(gs, gs.GetClippingPath(), gs.GetCtm()), EventType.CLIP_PATH_CHANGED);
ProcessContent(page.GetContentBytes(), page.GetResources());
}// public virtual void ProcessPageContent(PdfPage page)
/// <summary>
/// Accessor method for the
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener"/>
/// object maintained in this class.
/// </summary>
/// <remarks>
/// Accessor method for the
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Listener.IEventListener"/>
/// object maintained in this class.
/// Necessary for implementing custom ContentOperator implementations.
/// </remarks>
/// <returns>the renderListener</returns>
public virtual IEventListener GetEventListener()
{
return eventListener;
}//public virtual IEventListener GetEventListener()
/// <summary>Loads all the supported graphics and text state operators in a map.</summary>
public virtual void PopulateOperators()
{
RegisterContentOperator(DEFAULT_OPERATOR, new PdfCanvasProcessor.IgnoreOperator());
RegisterContentOperator("q", new PdfCanvasProcessor.PushGraphicsStateOperator());
RegisterContentOperator("Q", new PdfCanvasProcessor.PopGraphicsStateOperator());
RegisterContentOperator("cm", new PdfCanvasProcessor.ModifyCurrentTransformationMatrixOperator());
RegisterContentOperator("Do", new PdfCanvasProcessor.DoOperator());
RegisterContentOperator("BMC", new PdfCanvasProcessor.BeginMarkedContentOperator());
RegisterContentOperator("BDC", new PdfCanvasProcessor.BeginMarkedContentDictionaryOperator());
RegisterContentOperator("EMC", new PdfCanvasProcessor.EndMarkedContentOperator());
if (
supportedEvents == null
||
supportedEvents.Contains(EventType.RENDER_TEXT)
||
supportedEvents.Contains(EventType.RENDER_PATH)
||
supportedEvents.Contains(EventType.CLIP_PATH_CHANGED)
)
{
RegisterContentOperator("g", new PdfCanvasProcessor.SetGrayFillOperator());
RegisterContentOperator("G", new PdfCanvasProcessor.SetGrayStrokeOperator());
RegisterContentOperator("rg", new PdfCanvasProcessor.SetRGBFillOperator());
RegisterContentOperator("RG", new PdfCanvasProcessor.SetRGBStrokeOperator());
RegisterContentOperator("k", new PdfCanvasProcessor.SetCMYKFillOperator());
RegisterContentOperator("K", new PdfCanvasProcessor.SetCMYKStrokeOperator());
RegisterContentOperator("cs", new PdfCanvasProcessor.SetColorSpaceFillOperator());
RegisterContentOperator("CS", new PdfCanvasProcessor.SetColorSpaceStrokeOperator());
RegisterContentOperator("sc", new PdfCanvasProcessor.SetColorFillOperator());
RegisterContentOperator("SC", new PdfCanvasProcessor.SetColorStrokeOperator());
RegisterContentOperator("scn", new PdfCanvasProcessor.SetColorFillOperator());
RegisterContentOperator("SCN", new PdfCanvasProcessor.SetColorStrokeOperator());
RegisterContentOperator("gs", new PdfCanvasProcessor.ProcessGraphicsStateResourceOperator());
}
if (supportedEvents == null || supportedEvents.Contains(EventType.RENDER_IMAGE))
{
RegisterContentOperator("EI", new PdfCanvasProcessor.EndImageOperator());
}
if (
supportedEvents == null
||
supportedEvents.Contains(EventType.RENDER_TEXT)
||
supportedEvents.Contains (EventType.BEGIN_TEXT)
||
supportedEvents.Contains(EventType.END_TEXT)
)
{
RegisterContentOperator("BT", new PdfCanvasProcessor.BeginTextOperator());
RegisterContentOperator("ET", new PdfCanvasProcessor.EndTextOperator());
}
if (supportedEvents == null || supportedEvents.Contains(EventType.RENDER_TEXT))
{
PdfCanvasProcessor
.SetTextCharacterSpacingOperator
tcOperator
=
new PdfCanvasProcessor.SetTextCharacterSpacingOperator ();
RegisterContentOperator("Tc", tcOperator);
PdfCanvasProcessor.SetTextWordSpacingOperator
twOperator
= new PdfCanvasProcessor.SetTextWordSpacingOperator ();
RegisterContentOperator("Tw", twOperator);
RegisterContentOperator("Tz", new PdfCanvasProcessor.SetTextHorizontalScalingOperator());
PdfCanvasProcessor.SetTextLeadingOperator
tlOperator
=
new PdfCanvasProcessor.SetTextLeadingOperator();
RegisterContentOperator("TL", tlOperator);
RegisterContentOperator("Tf", new PdfCanvasProcessor.SetTextFontOperator());
RegisterContentOperator("Tr", new PdfCanvasProcessor.SetTextRenderModeOperator());
RegisterContentOperator("Ts", new PdfCanvasProcessor.SetTextRiseOperator());
PdfCanvasProcessor.TextMoveStartNextLineOperator
tdOperator
= new PdfCanvasProcessor.TextMoveStartNextLineOperator ();
RegisterContentOperator("Td", tdOperator);
RegisterContentOperator
(
"TD",
new
PdfCanvasProcessor.TextMoveStartNextLineWithLeadingOperator(tdOperator, tlOperator)
);
RegisterContentOperator("Tm", new PdfCanvasProcessor.TextSetTextMatrixOperator());
PdfCanvasProcessor.TextMoveNextLineOperator
tstarOperator
=
new PdfCanvasProcessor.TextMoveNextLineOperator (tdOperator);
RegisterContentOperator("T*", tstarOperator);
PdfCanvasProcessor.ShowTextOperator tjOperator = new PdfCanvasProcessor.ShowTextOperator();
RegisterContentOperator("Tj", tjOperator);
PdfCanvasProcessor.MoveNextLineAndShowTextOperator
tickOperator
=
new PdfCanvasProcessor.MoveNextLineAndShowTextOperator
(tstarOperator, tjOperator);
RegisterContentOperator("'", tickOperator);
RegisterContentOperator
(
"\""
,
new
PdfCanvasProcessor
.MoveNextLineAndShowTextWithSpacingOperator(twOperator, tcOperator, tickOperator)
);
RegisterContentOperator("TJ", new PdfCanvasProcessor.ShowTextArrayOperator());
}
if (
supportedEvents == null
||
supportedEvents.Contains(EventType.CLIP_PATH_CHANGED)
||
supportedEvents.Contains(EventType.RENDER_PATH))
{
RegisterContentOperator("w", new PdfCanvasProcessor.SetLineWidthOperator());
RegisterContentOperator("J", new PdfCanvasProcessor.SetLineCapOperator());
RegisterContentOperator("j", new PdfCanvasProcessor.SetLineJoinOperator());
RegisterContentOperator("M", new PdfCanvasProcessor.SetMiterLimitOperator());
RegisterContentOperator("d", new PdfCanvasProcessor.SetLineDashPatternOperator());
int fillStroke = PathRenderInfo.FILL | PathRenderInfo.STROKE;
RegisterContentOperator("m", new PdfCanvasProcessor.MoveToOperator());
RegisterContentOperator("l", new PdfCanvasProcessor.LineToOperator());
RegisterContentOperator("c", new PdfCanvasProcessor.CurveOperator());
RegisterContentOperator("v", new PdfCanvasProcessor.CurveFirstPointDuplicatedOperator());
RegisterContentOperator("y", new PdfCanvasProcessor.CurveFourhPointDuplicatedOperator());
RegisterContentOperator("h", new PdfCanvasProcessor.CloseSubpathOperator());
RegisterContentOperator("re", new PdfCanvasProcessor.RectangleOperator());
RegisterContentOperator("S", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.STROKE, -1, false));
RegisterContentOperator("s", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.STROKE, -1, true));
RegisterContentOperator
("f", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.FILL, PdfCanvasConstants.FillingRule
.NONZERO_WINDING, false));
RegisterContentOperator("F", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.FILL, PdfCanvasConstants.FillingRule
.NONZERO_WINDING, false));
RegisterContentOperator("f*", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.FILL, PdfCanvasConstants.FillingRule
.EVEN_ODD, false));
RegisterContentOperator("B", new PdfCanvasProcessor.PaintPathOperator(fillStroke, PdfCanvasConstants.FillingRule
.NONZERO_WINDING, false));
RegisterContentOperator("B*", new PdfCanvasProcessor.PaintPathOperator(fillStroke, PdfCanvasConstants.FillingRule
.EVEN_ODD, false));
RegisterContentOperator("b", new PdfCanvasProcessor.PaintPathOperator(fillStroke, PdfCanvasConstants.FillingRule
.NONZERO_WINDING, true));
RegisterContentOperator("b*", new PdfCanvasProcessor.PaintPathOperator(fillStroke, PdfCanvasConstants.FillingRule
.EVEN_ODD, true));
RegisterContentOperator("n", new PdfCanvasProcessor.PaintPathOperator(PathRenderInfo.NO_OP, -1, false));
RegisterContentOperator("W", new PdfCanvasProcessor.ClipPathOperator(PdfCanvasConstants.FillingRule.NONZERO_WINDING
));
RegisterContentOperator("W*", new PdfCanvasProcessor.ClipPathOperator(PdfCanvasConstants.FillingRule.EVEN_ODD
));
}
}
/// <summary>Displays the current path.</summary>
/// <param name="operation">
/// One of the possible combinations of
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.STROKE"/>
/// and
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.FILL"/>
/// values or
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.NO_OP"/>
/// </param>
/// <param name="rule">
/// Either
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// or
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.EVEN_ODD"/>
/// In case it isn't applicable pass any <c>byte</c> value.
/// </param>
public virtual void PaintPath(int operation, int rule)
{
ParserGraphicsState gs = GetGraphicsState();
PathRenderInfo renderInfo
=
new PathRenderInfo(this.markedContentStack, gs, currentPath, operation, rule, isClip, clippingRule);
EventOccurred(renderInfo, EventType.RENDER_PATH);
if (isClip)
{
isClip = false;
gs.Clip(currentPath, clippingRule);
EventOccurred(new ClippingPathInfo(gs, gs.GetClippingPath(), gs.GetCtm()), EventType.CLIP_PATH_CHANGED);
}
currentPath = new Path();
}// public virtual void PaintPath(int operation, int rule)
/// <summary>Invokes an operator.</summary>
/// <param name="operator">the PDF Syntax of the operator</param>
/// <param name="operands">a list with operands</param>
public virtual void InvokeOperator(PdfLiteral @operator, IList<PdfObject> operands)
{
IContentOperator op = operators.Get(@operator.ToString());
if (op == null)
{
op = operators.Get(DEFAULT_OPERATOR);
}
op.Invoke(this, @operator, operands);
}// public virtual void InvokeOperator(PdfLiteral @operator, IList<PdfObject> operands)
public virtual PdfStream GetXObjectStream(PdfName xobjectName)
{
PdfDictionary xobjects = GetResources().GetResource(PdfName.XObject);
return xobjects.GetAsStream(xobjectName);
}// public virtual PdfStream GetXObjectStream(PdfName xobjectName)
public virtual PdfResources GetResources()
{
return resourcesStack[resourcesStack.Count - 1];
}// public virtual PdfResources GetResources()
public virtual void PopulateXObjectDoHandlers()
{
RegisterXObjectDoHandler(PdfName.Default, new PdfCanvasProcessor.IgnoreXObjectDoHandler());
RegisterXObjectDoHandler(PdfName.Form, new PdfCanvasProcessor.FormXObjectDoHandler());
if (supportedEvents == null || supportedEvents.Contains(EventType.RENDER_IMAGE))
{
RegisterXObjectDoHandler(PdfName.Image, new PdfCanvasProcessor.ImageXObjectDoHandler());
}// if (supportedEvents == null || supportedEvents.Contains(EventType.RENDER_IMAGE))
}// public virtual void PopulateXObjectDoHandlers()
/// <summary>
/// Creates a
/// <see cref="DETA7.Kernel.Font.PdfFont"/>
/// object by a font dictionary.
/// </summary>
/// <remarks>
/// Creates a
/// <see cref="DETA7.Kernel.Font.PdfFont"/>
/// object by a font dictionary. The font may have been cached in case
/// it is an indirect object.
/// </remarks>
/// <param name="fontDict">
/// the
/// <see cref="DETA7.Kernel.Pdf.PdfDictionary">font dictionary</see>
/// to create the font from
/// </param>
/// <returns>the created font</returns>
public virtual PdfFont GetFont(PdfDictionary fontDict)
{
if (fontDict.GetIndirectReference() == null)
{
return PdfFontFactory.CreateFont(fontDict);
}
else
{
int n = fontDict.GetIndirectReference().GetObjNumber();
WeakReference fontRef = cachedFonts.Get(n);
PdfFont font = (PdfFont)(fontRef == null ? null : fontRef.Target);
if (font == null)
{
font = PdfFontFactory.CreateFont(fontDict);
cachedFonts.Put(n, new WeakReference(font));
}
return font;
}
}// public virtual PdfFont GetFont(PdfDictionary fontDict)
/// <summary>Add to the marked content stack</summary>
/// <param name="tag">the tag of the marked content</param>
/// <param name="dict">the PdfDictionary associated with the marked content</param>
public virtual void BeginMarkedContent(PdfName tag, PdfDictionary dict)
{
markedContentStack.Push(new CanvasTag(tag).SetProperties(dict));
}//public virtual void BeginMarkedContent(PdfName tag, PdfDictionary dict)
/// <summary>Remove the latest marked content from the stack.</summary>
/// <remarks>Remove the latest marked content from the stack. Keeps track of the BMC, BDC and EMC operators.</remarks>
public virtual void EndMarkedContent()
{
markedContentStack.Pop();
}//public virtual void EndMarkedContent()
/// <summary>Used to trigger beginTextBlock on the renderListener</summary>
public void BeginText()
{
EventOccurred(null, EventType.BEGIN_TEXT);
}// public void BeginText()
/// <summary>Used to trigger endTextBlock on the renderListener</summary>
public void EndText()
{
EventOccurred(null, EventType.END_TEXT);
}//public void EndText()
/// <summary>This is a proxy to pass only those events to the event listener which are supported by it.</summary>
/// <param name="data">event data</param>
/// <param name="type">event type</param>
public virtual void EventOccurred(IEventData data, EventType type)
{
if (supportedEvents == null || supportedEvents.Contains(type))
{
eventListener.EventOccurred(data, type);
}// if (supportedEvents == null || supportedEvents.Contains(type))
if (data is AbstractRenderInfo)
{
((AbstractRenderInfo)data).ReleaseGraphicsState();
}// if (data is AbstractRenderInfo)
}// public virtual void EventOccurred(IEventData data, EventType type)
/// <summary>Displays text.</summary>
/// <param name="string">the text to display</param>
public void DisplayPdfString(PdfString @string)
{
TextRenderInfo renderInfo = new TextRenderInfo(@string, GetGraphicsState(), textMatrix, markedContentStack );
textMatrix = new Matrix(renderInfo.GetUnscaledWidth(), 0).Multiply(textMatrix);
EventOccurred(renderInfo, EventType.RENDER_TEXT);
}//public void DisplayPdfString(PdfString @string)
/// <summary>Displays an XObject using the registered handler for this XObject's subtype</summary>
/// <param name="resourceName">the name of the XObject to retrieve from the resource dictionary</param>
public void DisplayXObject(PdfName resourceName)
{
PdfStream xobjectStream = GetXObjectStream(resourceName);
PdfName subType = xobjectStream.GetAsName(PdfName.Subtype);
IXObjectDoHandler handler = xobjectDoHandlers.Get(subType);
if (handler == null)
{
handler = xobjectDoHandlers.Get(PdfName.Default);
}// if (handler == null)
handler.HandleXObject(this, this.markedContentStack, xobjectStream, resourceName);
}// public void DisplayXObject(PdfName resourceName)
public void DisplayImage(Stack<CanvasTag> canvasTagHierarchy, PdfStream imageStream, PdfName resourceName , bool isInline)
{
PdfDictionary colorSpaceDic = GetResources().GetResource(PdfName.ColorSpace);
ImageRenderInfo renderInfo
= new ImageRenderInfo
(
canvasTagHierarchy
, GetGraphicsState()
, GetGraphicsState().GetCtm()
, imageStream
, resourceName
, colorSpaceDic
, isInline
);
EventOccurred(renderInfo, EventType.RENDER_IMAGE);
}//public void DisplayImage(Stack<CanvasTag> canvasTagHierarchy, PdfStream imageStream, PdfName resourceName , bool isInline)
/// <summary>Adjusts the text matrix for the specified adjustment value (see TJ operator in the PDF spec for information)
/// </summary>
/// <param name="tj">the text adjustment</param>
public void ApplyTextAdjust(float tj)
{
float adjustBy
=
FontProgram
.ConvertTextSpaceToGlyphSpace(-tj)
*
GetGraphicsState()
.GetFontSize()
*
(GetGraphicsState ()
.GetHorizontalScaling() / 100F
);
textMatrix = new Matrix(adjustBy, 0).Multiply(textMatrix);
}// public void ApplyTextAdjust(float tj)
public void InitClippingPath(PdfPage page)
{
Path clippingPath = new Path();
clippingPath.Rectangle(page.GetCropBox());
GetGraphicsState()
.SetClippingPath(clippingPath);
}// public void InitClippingPath(PdfPage page)
/// <summary>A handler that implements operator (unregistered).</summary>
public class IgnoreOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
}// public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
// ignore the operator
}// public class IgnoreOperator : IContentOperator
/// <summary>A handler that implements operator (TJ).</summary>
/// <remarks>A handler that implements operator (TJ). For more information see Table 51 ISO-32000-1</remarks>
public class ShowTextArrayOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfArray array = (PdfArray)operands[0];
float tj = 0;
foreach (PdfObject entryObj in array)
{
if (entryObj is PdfString)
{
processor.DisplayPdfString((PdfString)entryObj);
tj = 0;
}
else
{
tj = ((PdfNumber)entryObj).FloatValue();
processor.ApplyTextAdjust(tj);
}
}
}
}
/// <summary>A handler that implements operator (").</summary>
/// <remarks>A handler that implements operator ("). For more information see Table 51 ISO-32000-1</remarks>
public class MoveNextLineAndShowTextWithSpacingOperator : IContentOperator
{
public PdfCanvasProcessor.SetTextWordSpacingOperator setTextWordSpacing;
public PdfCanvasProcessor.SetTextCharacterSpacingOperator setTextCharacterSpacing;
public PdfCanvasProcessor.MoveNextLineAndShowTextOperator moveNextLineAndShowText;
/// <summary>Create new instance of this handler.</summary>
/// <param name="setTextWordSpacing">the handler for Tw operator</param>
/// <param name="setTextCharacterSpacing">the handler for Tc operator</param>
/// <param name="moveNextLineAndShowText">the handler for ' operator</param>
public MoveNextLineAndShowTextWithSpacingOperator
(
PdfCanvasProcessor.SetTextWordSpacingOperator setTextWordSpacing
, PdfCanvasProcessor.SetTextCharacterSpacingOperator setTextCharacterSpacing
, PdfCanvasProcessor.MoveNextLineAndShowTextOperator
moveNextLineAndShowText
)
{
this.setTextWordSpacing = setTextWordSpacing;
this.setTextCharacterSpacing = setTextCharacterSpacing;
this.moveNextLineAndShowText = moveNextLineAndShowText;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber aw = (PdfNumber)operands[0];
PdfNumber ac = (PdfNumber)operands[1];
PdfString @string = (PdfString)operands[2];
IList<PdfObject> twOperands = new List<PdfObject>(1);
twOperands.Add(0, aw);
setTextWordSpacing.Invoke(processor, null, twOperands);
IList<PdfObject> tcOperands = new List<PdfObject>(1);
tcOperands.Add(0, ac);
setTextCharacterSpacing.Invoke(processor, null, tcOperands);
IList<PdfObject> tickOperands = new List<PdfObject>(1);
tickOperands.Add(0, @string);
moveNextLineAndShowText.Invoke(processor, null, tickOperands);
}
}
/// <summary>A handler that implements operator (').</summary>
/// <remarks>A handler that implements operator ('). For more information see Table 51 ISO-32000-1</remarks>
public class MoveNextLineAndShowTextOperator : IContentOperator
{
public PdfCanvasProcessor.TextMoveNextLineOperator textMoveNextLine;
public PdfCanvasProcessor.ShowTextOperator showText;
/// <summary>Creates the new instance of this handler</summary>
/// <param name="textMoveNextLine">the handler for T* operator</param>
/// <param name="showText">the handler for Tj operator</param>
public MoveNextLineAndShowTextOperator(PdfCanvasProcessor.TextMoveNextLineOperator textMoveNextLine, PdfCanvasProcessor.ShowTextOperator
showText)
{
this.textMoveNextLine = textMoveNextLine;
this.showText = showText;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
textMoveNextLine.Invoke(processor, null, new List<PdfObject>(0));
showText.Invoke(processor, null, operands);
}
}
/// <summary>A handler that implements operator (Tj).</summary>
/// <remarks>A handler that implements operator (Tj). For more information see Table 51 ISO-32000-1</remarks>
public class ShowTextOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfString @string = (PdfString)operands[0];
processor.DisplayPdfString(@string);
}
}
/// <summary>A handler that implements operator (T*).</summary>
/// <remarks>A handler that implements operator (T*). For more information see Table 51 ISO-32000-1</remarks>
public class TextMoveNextLineOperator : IContentOperator
{
public PdfCanvasProcessor.TextMoveStartNextLineOperator moveStartNextLine;
public TextMoveNextLineOperator(PdfCanvasProcessor.TextMoveStartNextLineOperator moveStartNextLine)
{
this.moveStartNextLine = moveStartNextLine;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
IList<PdfObject>
tdoperands
= new List<PdfObject>(2);
tdoperands.Add(0, new PdfNumber(0));
tdoperands.Add(1, new PdfNumber(-processor.GetGraphicsState().GetLeading()));
moveStartNextLine.Invoke(processor, null, tdoperands);
}
}
/// <summary>A handler that implements operator (Tm).</summary>
/// <remarks>A handler that implements operator (Tm). For more information see Table 51 ISO-32000-1</remarks>
public class TextSetTextMatrixOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float a = ((PdfNumber)operands[0]).FloatValue();
float b = ((PdfNumber)operands[1]).FloatValue();
float c = ((PdfNumber)operands[2]).FloatValue();
float d = ((PdfNumber)operands[3]).FloatValue();
float e = ((PdfNumber)operands[4]).FloatValue();
float f = ((PdfNumber)operands[5]).FloatValue();
processor.textLineMatrix = new Matrix(a, b, c, d, e, f);
processor.textMatrix = processor.textLineMatrix;
}
}///class completes
/// <summary>A handler that implements operator (TD).</summary>
/// <remarks>A handler that implements operator (TD). For more information see Table 51 ISO-32000-1</remarks>
public class TextMoveStartNextLineWithLeadingOperator : IContentOperator
{
public PdfCanvasProcessor.TextMoveStartNextLineOperator moveStartNextLine;
public PdfCanvasProcessor.SetTextLeadingOperator setTextLeading;
public TextMoveStartNextLineWithLeadingOperator(PdfCanvasProcessor.TextMoveStartNextLineOperator moveStartNextLine
, PdfCanvasProcessor.SetTextLeadingOperator setTextLeading)
{
this.moveStartNextLine = moveStartNextLine;
this.setTextLeading = setTextLeading;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
float ty = ((PdfNumber)operands[1]).FloatValue();
IList<PdfObject> tlOperands = new List<PdfObject>(1);
tlOperands.Add(0, new PdfNumber(-ty));
setTextLeading.Invoke(processor, null, tlOperands);
moveStartNextLine.Invoke(processor, null, operands);
}
}
/// <summary>A handler that implements operator (Td).</summary>
/// <remarks>A handler that implements operator (Td). For more information see Table 51 ISO-32000-1</remarks>
public class TextMoveStartNextLineOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float tx = ((PdfNumber)operands[0]).FloatValue();
float ty = ((PdfNumber)operands[1]).FloatValue();
Matrix translationMatrix = new Matrix(tx, ty);
processor.textMatrix = translationMatrix.Multiply(processor.textLineMatrix);
processor.textLineMatrix = processor.textMatrix;
}
}
/// <summary>A handler that implements operator (Tf).</summary>
/// <remarks>A handler that implements operator (Tf). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextFontOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfName fontResourceName = (PdfName)operands[0];
float size = ((PdfNumber)operands[1]).FloatValue();
PdfDictionary fontsDictionary = processor.GetResources().GetResource(PdfName.Font);
PdfDictionary fontDict = fontsDictionary.GetAsDictionary(fontResourceName);
PdfFont font = null;
font = processor.GetFont(fontDict);
processor.GetGraphicsState().SetFont(font);
processor.GetGraphicsState().SetFontSize(size);
}
}
/// <summary>A handler that implements operator (Tr).</summary>
/// <remarks>A handler that implements operator (Tr). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextRenderModeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber render = (PdfNumber)operands[0];
processor.GetGraphicsState().SetTextRenderingMode(render.IntValue());
}
}
/// <summary>A handler that implements operator (Ts).</summary>
/// <remarks>A handler that implements operator (Ts). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextRiseOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber rise = (PdfNumber)operands[0];
processor.GetGraphicsState().SetTextRise(rise.FloatValue());
}
}
/// <summary>A handler that implements operator (TL).</summary>
/// <remarks>A handler that implements operator (TL). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextLeadingOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber leading = (PdfNumber)operands[0];
processor.GetGraphicsState().SetLeading(leading.FloatValue());
}
}
/// <summary>A handler that implements operator (Tz).</summary>
/// <remarks>A handler that implements operator (Tz). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextHorizontalScalingOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber scale = (PdfNumber)operands[0];
processor.GetGraphicsState().SetHorizontalScaling(scale.FloatValue());
}
}
/// <summary>A handler that implements operator (Tc).</summary>
/// <remarks>A handler that implements operator (Tc). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextCharacterSpacingOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber charSpace = (PdfNumber)operands[0];
processor.GetGraphicsState().SetCharSpacing(charSpace.FloatValue());
}
}
/// <summary>A handler that implements operator (Tw).</summary>
/// <remarks>A handler that implements operator (Tw). For more information see Table 51 ISO-32000-1</remarks>
public class SetTextWordSpacingOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfNumber wordSpace = (PdfNumber)operands[0];
processor.GetGraphicsState().SetWordSpacing(wordSpace.FloatValue());
}
}
/// <summary>A handler that implements operator (gs).</summary>
/// <remarks>A handler that implements operator (gs). For more information see Table 51 ISO-32000-1</remarks>
public class ProcessGraphicsStateResourceOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfName dictionaryName = (PdfName)operands[0];
PdfDictionary
extGState
= processor.GetResources().GetResource(PdfName.ExtGState);
if (extGState == null)
{
throw new PdfException(KernelExceptionMessageConstant.RESOURCES_DO_NOT_CONTAIN_EXTGSTATE_ENTRY_UNABLE_TO_PROCESS_THIS_OPERATOR).SetMessageParams(@operator);
}
PdfDictionary gsDic = extGState.GetAsDictionary(dictionaryName);
if (gsDic == null)
{
gsDic = extGState.GetAsStream(dictionaryName);
if (gsDic == null)
{
throw new PdfException(KernelExceptionMessageConstant.UNKNOWN_GRAPHICS_STATE_DICTIONARY).SetMessageParams(
dictionaryName);
}
}
PdfArray fontParameter = gsDic.GetAsArray(PdfName.Font);
if (fontParameter != null)
{
PdfFont font = processor.GetFont(fontParameter.GetAsDictionary(0));
float size = fontParameter.GetAsNumber(1).FloatValue();
processor.GetGraphicsState().SetFont(font);
processor.GetGraphicsState().SetFontSize(size);
}
PdfExtGState pdfExtGState = new PdfExtGState(gsDic.Clone(JavaCollectionsUtil.SingletonList(PdfName.Font)));
processor.GetGraphicsState().UpdateFromExtGState(pdfExtGState);
}
}
/// <summary>A handler that implements operator (q).</summary>
/// <remarks>A handler that implements operator (q). For more information see Table 51 ISO-32000-1</remarks>
public class PushGraphicsStateOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
ParserGraphicsState gs = processor.gsStack.Peek();
ParserGraphicsState copy = new ParserGraphicsState(gs);
processor.gsStack.Push(copy);
}
}
/// <summary>A handler that implements operator (cm).</summary>
/// <remarks>A handler that implements operator (cm). For more information see Table 51 ISO-32000-1</remarks>
public class ModifyCurrentTransformationMatrixOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float a = ((PdfNumber)operands[0]).FloatValue();
float b = ((PdfNumber)operands[1]).FloatValue();
float c = ((PdfNumber)operands[2]).FloatValue();
float d = ((PdfNumber)operands[3]).FloatValue();
float e = ((PdfNumber)operands[4]).FloatValue();
float f = ((PdfNumber)operands[5]).FloatValue();
Matrix matrix = new Matrix(a, b, c, d, e, f);
// a b 0
// c d 0
// e f 1
// matrix.Get(Matrix.I11) IS a
// matrix.Get(Matrix.I12) IS c
// matrix.Get(Matrix.I13) IS e
// matrix.Get(Matrix.I21) IS b
// matrix.Get(Matrix.I22) IS d
// matrix.Get(Matrix.I23) IS f
// matrix.Get(Matrix.I31) IS 0
// matrix.Get(Matrix.I32) IS 0
// matrix.Get(Matrix.I33) IS 1
//flush first
CTM_SAAN__a = 0;// ((PdfNumber)operands[0]).FloatValue();
CTM_SAAN__b = 0;// ((PdfNumber)operands[1]).FloatValue();
CTM_SAAN__c = 0;// ((PdfNumber)operands[2]).FloatValue();
CTM_SAAN__d = 0;// ((PdfNumber)operands[3]).FloatValue();
CTM_SAAN__e = 0;// ((PdfNumber)operands[4]).FloatValue();
CTM_SAAN__f = 0;// ((PdfNumber)operands[5]).FloatValue();
//refills
CTM_SAAN__a = a;// ((PdfNumber)operands[0]).FloatValue();
CTM_SAAN__b = b;// ((PdfNumber)operands[1]).FloatValue();
CTM_SAAN__c = c;// ((PdfNumber)operands[2]).FloatValue();
CTM_SAAN__d = d;// ((PdfNumber)operands[3]).FloatValue();
CTM_SAAN__e = e;// ((PdfNumber)operands[4]).FloatValue();
CTM_SAAN__f = f;// ((PdfNumber)operands[5]).FloatValue();
try
{
processor.GetGraphicsState().UpdateCtm(matrix);
}
catch (PdfException exception)
{
if (!(exception.InnerException is NoninvertibleTransformException))
{
throw;
}
else {
ILogger logger = DETA7LogManager.GetLogger(typeof(PdfCanvasProcessor));
logger.LogError(MessageFormatUtil.Format(DETA7.IO.Logs.IoLogMessageConstant.FAILED_TO_PROCESS_A_TRANSFORMATION_MATRIX));
}
}
}
}
/// <summary>Gets a color based on a list of operands and Color space.</summary>
public static Color GetColor(PdfColorSpace pdfColorSpace, IList<PdfObject> operands, PdfResources resources)
{
PdfObject pdfObject;
if (pdfColorSpace.GetPdfObject().IsIndirectReference())
{
pdfObject = ((PdfIndirectReference)pdfColorSpace.GetPdfObject()).GetRefersTo();
}
else
{
pdfObject = pdfColorSpace.GetPdfObject();
}
if (pdfObject.IsName())
{
if (PdfName.DeviceGray.Equals(pdfObject))
{
return new DeviceGray(GetColorants(operands)[0]);
}
else
{
if (PdfName.Pattern.Equals(pdfObject))
{
if (operands[0] is PdfName)
{
PdfPattern pattern = resources.GetPattern((PdfName)operands[0]);
if (pattern != null)
{
return new PatternColor(pattern);
}
}
}
}
if (PdfName.DeviceRGB.Equals(pdfObject))
{
float[] c = GetColorants(operands);
return new DeviceRgb(c[0], c[1], c[2]);
}
else
{
if (PdfName.DeviceCMYK.Equals(pdfObject))
{
float[] c = GetColorants(operands);
return new DeviceCmyk(c[0], c[1], c[2], c[3]);
}
}
}
else
{
if (pdfObject.IsArray())
{
PdfArray array = (PdfArray)pdfObject;
PdfName csType = array.GetAsName(0);
if (PdfName.CalGray.Equals(csType))
{
return new CalGray((PdfCieBasedCs.CalGray)pdfColorSpace, GetColorants(operands)[0]);
}
else
{
if (PdfName.CalRGB.Equals(csType))
{
return new CalRgb((PdfCieBasedCs.CalRgb)pdfColorSpace, GetColorants(operands));
}
else {
if (PdfName.Lab.Equals(csType))
{
return new Lab((PdfCieBasedCs.Lab)pdfColorSpace, GetColorants(operands));
}
else {
if (PdfName.ICCBased.Equals(csType))
{
return new IccBased((PdfCieBasedCs.IccBased)pdfColorSpace, GetColorants(operands));
}
else {
if (PdfName.Indexed.Equals(csType))
{
return new Indexed(pdfColorSpace, (int)GetColorants(operands)[0]);
}
else {
if (PdfName.Separation.Equals(csType))
{
return new Separation((PdfSpecialCs.Separation)pdfColorSpace, GetColorants(operands)[0]);
}
else
{
if (PdfName.DeviceN.Equals(csType))
{
return new DeviceN((PdfSpecialCs.DeviceN)pdfColorSpace, GetColorants(operands));
}
else
{
if (PdfName.Pattern.Equals(csType))
{
IList<PdfObject> underlyingOperands = new List<PdfObject>(operands);
PdfObject patternName = underlyingOperands.JRemoveAt(operands.Count - 2);
PdfColorSpace underlyingCs = ((PdfSpecialCs.UncoloredTilingPattern)pdfColorSpace).GetUnderlyingColorSpace(
);
if (patternName is PdfName)
{
PdfPattern pattern = resources.GetPattern((PdfName)patternName);
if (pattern is PdfPattern.Tiling && !((PdfPattern.Tiling)pattern).IsColored())
{
return new PatternColor((PdfPattern.Tiling)pattern, underlyingCs, GetColorants(underlyingOperands));
}
}
}
}
}
}
}
}
}
}
}
}
ILogger logger = DETA7LogManager.GetLogger(typeof(PdfCanvasProcessor));
logger.LogWarning(MessageFormatUtil.Format(KernelLogMessageConstant.UNABLE_TO_PARSE_COLOR_WITHIN_COLORSPACE
, JavaUtil.ArraysToString((Object[])operands.ToArray()), pdfColorSpace.GetPdfObject()));
return null;
}
/// <summary>Gets a color based on a list of operands.</summary>
public static Color GetColor(int nOperands, IList<PdfObject> operands)
{
float[] c = new float[nOperands];
for (int i = 0; i < nOperands; i++)
{
c[i] = ((PdfNumber)operands[i]).FloatValue();
}
switch (nOperands)
{
case 1:
{
return new DeviceGray(c[0]);
}
case 3:
{
return new DeviceRgb(c[0], c[1], c[2]);
}
case 4:
{
return new DeviceCmyk(c[0], c[1], c[2], c[3]);
}
}
return null;
}
public static float[] GetColorants(IList<PdfObject> operands)
{
float[] c = new float[operands.Count - 1];
for (int i = 0; i < operands.Count - 1; i++)
{
c[i] = ((PdfNumber)operands[i]).FloatValue();
}
return c;
}
/// <summary>A handler that implements operator (Q).</summary>
/// <remarks>A handler that implements operator (Q). For more information see Table 51 ISO-32000-1</remarks>
public class PopGraphicsStateOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.gsStack.Pop();
ParserGraphicsState gs = processor.GetGraphicsState();
processor.EventOccurred
(
new ClippingPathInfo
(
gs
, gs.GetClippingPath()
, gs.GetCtm()
)
, EventType.CLIP_PATH_CHANGED
);
}
}
/// <summary>A handler that implements operator (g).</summary>
/// <remarks>A handler that implements operator (g). For more information see Table 51 ISO-32000-1</remarks>
public class SetGrayFillOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetFillColor(GetColor(1, operands));
}
}
/// <summary>A handler that implements operator (G).</summary>
/// <remarks>A handler that implements operator (G). For more information see Table 51 ISO-32000-1</remarks>
public class SetGrayStrokeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetStrokeColor(GetColor(1, operands));
}
}
/// <summary>A handler that implements operator (rg).</summary>
/// <remarks>A handler that implements operator (rg). For more information see Table 51 ISO-32000-1</remarks>
public class SetRGBFillOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetFillColor(GetColor(3, operands));
}
}
/// <summary>A handler that implements operator (RG).</summary>
/// <remarks>A handler that implements operator (RG). For more information see Table 51 ISO-32000-1</remarks>
public class SetRGBStrokeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetStrokeColor(GetColor(3, operands));
}
}
/// <summary>A handler that implements operator (k).</summary>
/// <remarks>A handler that implements operator (k). For more information see Table 51 ISO-32000-1</remarks>
public class SetCMYKFillOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetFillColor(GetColor(4, operands));
}
}
/// <summary>A handler that implements operator (K).</summary>
/// <remarks>A handler that implements operator (K). For more information see Table 51 ISO-32000-1</remarks>
public class SetCMYKStrokeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetStrokeColor(GetColor(4, operands));
}
}
/// <summary>A handler that implements operator (CS).</summary>
/// <remarks>A handler that implements operator (CS). For more information see Table 51 ISO-32000-1</remarks>
public class SetColorSpaceFillOperator : IContentOperator {
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfColorSpace pdfColorSpace = DetermineColorSpace((PdfName)operands[0], processor);
processor.GetGraphicsState().SetFillColor(Color.MakeColor(pdfColorSpace));
}
//\cond DO_NOT_DOCUMENT
public static PdfColorSpace DetermineColorSpace(PdfName colorSpace, PdfCanvasProcessor processor)
{
PdfColorSpace pdfColorSpace;
if (PdfColorSpace.DIRECT_COLOR_SPACES.Contains(colorSpace))
{
pdfColorSpace = PdfColorSpace.MakeColorSpace(colorSpace);
}
else
{
PdfResources pdfResources = processor.GetResources();
PdfDictionary resourceColorSpace = pdfResources.GetPdfObject().GetAsDictionary(PdfName.ColorSpace);
pdfColorSpace = PdfColorSpace.MakeColorSpace(resourceColorSpace.Get(colorSpace));
}
return pdfColorSpace;
}
//\endcond
}
/// <summary>A handler that implements operator (cs).</summary>
/// <remarks>A handler that implements operator (cs). For more information see Table 51 ISO-32000-1</remarks>
public class SetColorSpaceStrokeOperator : IContentOperator {
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfColorSpace pdfColorSpace = PdfCanvasProcessor.SetColorSpaceFillOperator.DetermineColorSpace((PdfName)operands
[0], processor);
processor.GetGraphicsState().SetStrokeColor(Color.MakeColor(pdfColorSpace));
}
}
/// <summary>A handler that implements operator (sc / scn).</summary>
/// <remarks>A handler that implements operator (sc / scn). For more information see Table 51 ISO-32000-1</remarks>
public class SetColorFillOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor
.GetGraphicsState()
.SetFillColor
(
GetColor
(
processor.GetGraphicsState().GetFillColor().GetColorSpace()
, operands
, processor.GetResources()
)
);
}
}
/// <summary>A handler that implements operator (SC / SCN).</summary>
/// <remarks>A handler that implements operator (SC / SCN). For more information see Table 51 ISO-32000-1</remarks>
public class SetColorStrokeOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor
.GetGraphicsState().SetStrokeColor
(
GetColor
(
processor.GetGraphicsState().GetStrokeColor().GetColorSpace()
, operands
, processor.GetResources()
)
);
}
}
/// <summary>A handler that implements operator (BT).</summary>
/// <remarks>A handler that implements operator (BT). For more information see Table 51 ISO-32000-1</remarks>
public class BeginTextOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.textMatrix = new Matrix();
processor.textLineMatrix = processor.textMatrix;
processor.BeginText();
}
}
/// <summary>A handler that implements operator (ET).</summary>
/// <remarks>A handler that implements operator (ET). For more information see Table 51 ISO-32000-1</remarks>
public class EndTextOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.textMatrix = null;
processor.textLineMatrix = null;
processor.EndText();
}
}
/// <summary>A handler that implements operator (BMC).</summary>
/// <remarks>A handler that implements operator (BMC). For more information see Table 51 ISO-32000-1</remarks>
public class BeginMarkedContentOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.BeginMarkedContent((PdfName)operands[0], null);
}
}
/// <summary>A handler that implements operator (BDC).</summary>
/// <remarks>A handler that implements operator (BDC). For more information see Table 51 ISO-32000-1</remarks>
public class BeginMarkedContentDictionaryOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfObject properties = operands[1];
processor
.BeginMarkedContent
(
(PdfName)operands[0], GetPropertiesDictionary(properties, processor.GetResources())
);
}
//\cond DO_NOT_DOCUMENT
public virtual PdfDictionary GetPropertiesDictionary(PdfObject operand1, PdfResources resources)
{
if (operand1.IsDictionary())
{
return (PdfDictionary)operand1;
}
PdfName dictionaryName = ((PdfName)operand1);
PdfDictionary properties = resources.GetResource(PdfName.Properties);
if (null == properties)
{
ILogger logger = DETA7LogManager.GetLogger(typeof(PdfCanvasProcessor));
logger.LogWarning(MessageFormatUtil.Format(DETA7.IO.Logs.IoLogMessageConstant.PDF_REFERS_TO_NOT_EXISTING_PROPERTY_DICTIONARY
, PdfName.Properties));
return null;
}
PdfDictionary propertiesDictionary = properties.GetAsDictionary(dictionaryName);
if (null == propertiesDictionary)
{
ILogger logger = DETA7LogManager.GetLogger(typeof(PdfCanvasProcessor));
logger.LogWarning(MessageFormatUtil.Format(DETA7.IO.Logs.IoLogMessageConstant.PDF_REFERS_TO_NOT_EXISTING_PROPERTY_DICTIONARY
, dictionaryName));
return null;
}
return properties.GetAsDictionary(dictionaryName);
}
//\endcond
}
/// <summary>A handler that implements operator (EMC).</summary>
/// <remarks>A handler that implements operator (EMC). For more information see Table 51 ISO-32000-1</remarks>
public class EndMarkedContentOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.EndMarkedContent();
}
}
/// <summary>A handler that implements operator (Do).</summary>
/// <remarks>A handler that implements operator (Do). For more information see Table 51 ISO-32000-1</remarks>
public class DoOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfName resourceName = (PdfName)operands[0];
processor.DisplayXObject(resourceName);
}
}
/// <summary>A handler that implements operator (EI).</summary>
/// <remarks>
/// A handler that implements operator (EI). For more information see Table 51 ISO-32000-1
/// BI and ID operators are parsed along with this operator.
/// This not a usual operator, it will have a single operand, which will be a PdfStream object which
/// encapsulates inline image dictionary and bytes
/// </remarks>
public class EndImageOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
PdfStream imageStream = (PdfStream)operands[0];
processor.DisplayImage(processor.markedContentStack, imageStream, null, true);
}
}
/// <summary>A handler that implements operator (w).</summary>
/// <remarks>A handler that implements operator (w). For more information see Table 51 ISO-32000-1</remarks>
public class SetLineWidthOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
float lineWidth = ((PdfNumber)operands[0]).FloatValue();
processor.GetGraphicsState().SetLineWidth(lineWidth);
}
}
/// <summary>A handler that implements operator (J).</summary>
/// <remarks>A handler that implements operator (J). For more information see Table 51 ISO-32000-1</remarks>
public class SetLineCapOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
int lineCap = ((PdfNumber)operands[0]).IntValue();
processor.GetGraphicsState().SetLineCapStyle(lineCap);
}
}
/// <summary>A handler that implements operator (j).</summary>
/// <remarks>A handler that implements operator (j). For more information see Table 51 ISO-32000-1</remarks>
public class SetLineJoinOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
int lineJoin = ((PdfNumber)operands[0]).IntValue();
processor.GetGraphicsState().SetLineJoinStyle(lineJoin);
}
}
/// <summary>A handler that implements operator (M).</summary>
/// <remarks>A handler that implements operator (M). For more information see Table 51 ISO-32000-1</remarks>
public class SetMiterLimitOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
float miterLimit = ((PdfNumber)operands[0]).FloatValue();
processor.GetGraphicsState().SetMiterLimit(miterLimit);
}
}
/// <summary>A handler that implements operator (d).</summary>
/// <remarks>A handler that implements operator (d). For more information see Table 51 ISO-32000-1</remarks>
public class SetLineDashPatternOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral oper, IList<PdfObject> operands)
{
processor.GetGraphicsState().SetDashPattern(new PdfArray(JavaUtil.ArraysAsList(operands[0], operands[1])));
}
}
/// <summary>An XObject subtype handler for FORM</summary>
public class FormXObjectDoHandler : IXObjectDoHandler
{
public virtual void HandleXObject(PdfCanvasProcessor processor, Stack<CanvasTag> canvasTagHierarchy, PdfStream
xObjectStream, PdfName xObjectName)
{
PdfDictionary resourcesDic = xObjectStream.GetAsDictionary(PdfName.Resources);
PdfResources resources;
if (resourcesDic == null)
{
resources = processor.GetResources();
}
else
{
resources = new PdfResources(resourcesDic);
}
// we read the content bytes up here so if it fails we don't leave the graphics state stack corrupted
// this is probably not necessary (if we fail on this, probably the entire content stream processing
// operation should be rejected
byte[] contentBytes;
contentBytes = xObjectStream.GetBytes();
PdfArray matrix = xObjectStream.GetAsArray(PdfName.Matrix);
new PdfCanvasProcessor.PushGraphicsStateOperator().Invoke(processor, null, null);
if (matrix != null) {
float a = matrix.GetAsNumber(0).FloatValue();
float b = matrix.GetAsNumber(1).FloatValue();
float c = matrix.GetAsNumber(2).FloatValue();
float d = matrix.GetAsNumber(3).FloatValue();
float e = matrix.GetAsNumber(4).FloatValue();
float f = matrix.GetAsNumber(5).FloatValue();
Matrix formMatrix = new Matrix(a, b, c, d, e, f);
processor.GetGraphicsState().UpdateCtm(formMatrix);
}
processor.ProcessContent(contentBytes, resources);
new PdfCanvasProcessor.PopGraphicsStateOperator().Invoke(processor, null, null);
}
}
/// <summary>An XObject subtype handler for IMAGE</summary>
public class ImageXObjectDoHandler : IXObjectDoHandler {
public virtual void HandleXObject(PdfCanvasProcessor processor, Stack<CanvasTag> canvasTagHierarchy, PdfStream
xObjectStream, PdfName resourceName) {
processor.DisplayImage(canvasTagHierarchy, xObjectStream, resourceName, false);
}
}
/// <summary>An XObject subtype handler that does nothing</summary>
public class IgnoreXObjectDoHandler : IXObjectDoHandler
{
public virtual void HandleXObject(PdfCanvasProcessor processor, Stack<CanvasTag> canvasTagHierarchy, PdfStream
xObjectStream, PdfName xObjectName)
{
}
// ignore XObject subtype
}
/// <summary>A handler that implements operator (m).</summary>
/// <remarks>A handler that implements operator (m). For more information see Table 51 ISO-32000-1</remarks>
public class MoveToOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
float x = ((PdfNumber)operands[0]).FloatValue();
float y = ((PdfNumber)operands[1]).FloatValue();
processor.currentPath.MoveTo(x, y);
}
}
/// <summary>A handler that implements operator (l).</summary>
/// <remarks>A handler that implements operator (l). For more information see Table 51 ISO-32000-1</remarks>
public class LineToOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
float x = ((PdfNumber)operands[0]).FloatValue();
float y = ((PdfNumber)operands[1]).FloatValue();
processor.currentPath.LineTo(x, y);
}
}
/// <summary>A handler that implements operator (c).</summary>
/// <remarks>A handler that implements operator (c). For more information see Table 51 ISO-32000-1</remarks>
public class CurveOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
float x1 = ((PdfNumber)operands[0]).FloatValue();
float y1 = ((PdfNumber)operands[1]).FloatValue();
float x2 = ((PdfNumber)operands[2]).FloatValue();
float y2 = ((PdfNumber)operands[3]).FloatValue();
float x3 = ((PdfNumber)operands[4]).FloatValue();
float y3 = ((PdfNumber)operands[5]).FloatValue();
processor.currentPath.CurveTo(x1, y1, x2, y2, x3, y3);
}
}
/// <summary>A handler that implements operator (v).</summary>
/// <remarks>A handler that implements operator (v). For more information see Table 51 ISO-32000-1</remarks>
public class CurveFirstPointDuplicatedOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float x2 = ((PdfNumber)operands[0]).FloatValue();
float y2 = ((PdfNumber)operands[1]).FloatValue();
float x3 = ((PdfNumber)operands[2]).FloatValue();
float y3 = ((PdfNumber)operands[3]).FloatValue();
processor.currentPath.CurveTo(x2, y2, x3, y3);
}
}
/// <summary>A handler that implements operator (y).</summary>
/// <remarks>A handler that implements operator (y). For more information see Table 51 ISO-32000-1</remarks>
public class CurveFourhPointDuplicatedOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float x1 = ((PdfNumber)operands[0]).FloatValue();
float y1 = ((PdfNumber)operands[1]).FloatValue();
float x3 = ((PdfNumber)operands[2]).FloatValue();
float y3 = ((PdfNumber)operands[3]).FloatValue();
processor.currentPath.CurveFromTo(x1, y1, x3, y3);
}
}
/// <summary>A handler that implements operator (h).</summary>
/// <remarks>A handler that implements operator (h). For more information see Table 51 ISO-32000-1</remarks>
public class CloseSubpathOperator : IContentOperator
{
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
processor.currentPath.CloseSubpath();
}
}
/// <summary>A handler that implements operator (re).</summary>
/// <remarks>A handler that implements operator (re). For more information see Table 51 ISO-32000-1</remarks>
public class RectangleOperator : IContentOperator {
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
float x = ((PdfNumber)operands[0]).FloatValue();
float y = ((PdfNumber)operands[1]).FloatValue();
float w = ((PdfNumber)operands[2]).FloatValue();
float h = ((PdfNumber)operands[3]).FloatValue();
processor.currentPath.Rectangle(x, y, w, h);
}
}
/// <summary>A handler that implements operator (S, s, f, F, f*, B, B*, b, b*).</summary>
/// <remarks>A handler that implements operator (S, s, f, F, f*, B, B*, b, b*). For more information see Table 51 ISO-32000-1
/// </remarks>
public class PaintPathOperator : IContentOperator
{
public int operation;
public int rule;
public bool close;
/// <summary>Constructs PainPath object.</summary>
/// <param name="operation">
/// One of the possible combinations of
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.STROKE"/>
/// and
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.FILL"/>
/// values or
/// <see cref="DETA7.Kernel.Pdf.Canvas.Parser.Data.PathRenderInfo.NO_OP"/>
/// </param>
/// <param name="rule">
/// Either
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.NONZERO_WINDING"/>
/// or
/// <see cref="DETA7.Kernel.Pdf.Canvas.PdfCanvasConstants.FillingRule.EVEN_ODD"/>
/// In case it isn't applicable pass any value.
/// </param>
/// <param name="close">Indicates whether the path should be closed or not.</param>
public PaintPathOperator(int operation, int rule, bool close)
{
this.operation = operation;
this.rule = rule;
this.close = close;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands)
{
if (close)
{
processor.currentPath.CloseSubpath();
}
processor.PaintPath(operation, rule);
}
}
/// <summary>A handler that implements operator (W, W*).</summary>
/// <remarks>A handler that implements operator (W, W*). For more information see Table 51 ISO-32000-1</remarks>
public class ClipPathOperator : IContentOperator
{
public int rule;
public ClipPathOperator(int rule)
{
this.rule = rule;
}
/// <summary><inheritDoc/></summary>
public virtual void Invoke(PdfCanvasProcessor processor, PdfLiteral @operator, IList<PdfObject> operands) {
processor.isClip = true;
processor.clippingRule = rule;
}
}
}
}